r263048 - ARM & AArch64: convert asm tests to LLVM IR and restrict optimizations.
Tim Northover via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 9 10:54:43 PST 2016
Modified: cfe/trunk/test/CodeGen/arm_neon_intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm_neon_intrinsics.c?rev=263048&r1=263047&r2=263048&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm_neon_intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/arm_neon_intrinsics.c Wed Mar 9 12:54:42 2016
@@ -1,1611 +1,2446 @@
// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
-// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\
-// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT
-// RUN: %clang_cc1 -triple armv8-linux-gnu \
-// RUN: -target-cpu cortex-a57 -mfloat-abi soft -ffreestanding -Os -S -o - %s\
-// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-A57
+// RUN: -target-cpu swift -ffreestanding -emit-llvm -S -o - %s\
+// RUN: opt -S -mem2reg | FileCheck %s
// REQUIRES: long_tests
#include <arm_neon.h>
-// CHECK-LABEL: test_vaba_s8
-// CHECK: vaba.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_I_I]]
+// CHECK: ret <8 x i8> [[ADD_I]]
int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
return vaba_s8(a, b, c);
}
-// CHECK-LABEL: test_vaba_s16
-// CHECK: vaba.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD2_I_I]]
+// CHECK: ret <4 x i16> [[ADD_I]]
int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vaba_s16(a, b, c);
}
-// CHECK-LABEL: test_vaba_s32
-// CHECK: vaba.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD2_I_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vaba_s32(a, b, c);
}
-// CHECK-LABEL: test_vaba_u8
-// CHECK: vaba.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_I_I]]
+// CHECK: ret <8 x i8> [[ADD_I]]
uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vaba_u8(a, b, c);
}
-// CHECK-LABEL: test_vaba_u16
-// CHECK: vaba.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD2_I_I]]
+// CHECK: ret <4 x i16> [[ADD_I]]
uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vaba_u16(a, b, c);
}
-// CHECK-LABEL: test_vaba_u32
-// CHECK: vaba.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD2_I_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vaba_u32(a, b, c);
}
-// CHECK-LABEL: test_vabaq_s8
-// CHECK: vaba.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %b, <16 x i8> %c) #4
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABD_I_I]]
+// CHECK: ret <16 x i8> [[ADD_I]]
int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
return vabaq_s8(a, b, c);
}
-// CHECK-LABEL: test_vabaq_s16
-// CHECK: vaba.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABD2_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
return vabaq_s16(a, b, c);
}
-// CHECK-LABEL: test_vabaq_s32
-// CHECK: vaba.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABD2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
return vabaq_s32(a, b, c);
}
-// CHECK-LABEL: test_vabaq_u8
-// CHECK: vaba.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %b, <16 x i8> %c) #4
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABD_I_I]]
+// CHECK: ret <16 x i8> [[ADD_I]]
uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vabaq_u8(a, b, c);
}
-// CHECK-LABEL: test_vabaq_u16
-// CHECK: vaba.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABD2_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vabaq_u16(a, b, c);
}
-// CHECK-LABEL: test_vabaq_u32
-// CHECK: vaba.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABD2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vabaq_u32(a, b, c);
}
-// CHECK-LABEL: test_vabal_s8
-// CHECK: vabal.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
return vabal_s8(a, b, c);
}
-// CHECK-LABEL: test_vabal_s16
-// CHECK: vabal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vabal_s16(a, b, c);
}
-// CHECK-LABEL: test_vabal_s32
-// CHECK: vabal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vabal_s32(a, b, c);
}
-// CHECK-LABEL: test_vabal_u8
-// CHECK: vabal.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vabal_u8(a, b, c);
}
-// CHECK-LABEL: test_vabal_u16
-// CHECK: vabal.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vabal_u16(a, b, c);
}
-// CHECK-LABEL: test_vabal_u32
-// CHECK: vabal.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vabal_u32(a, b, c);
}
-// CHECK-LABEL: test_vabd_s8
-// CHECK: vabd.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VABD_I]]
int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) {
return vabd_s8(a, b);
}
-// CHECK-LABEL: test_vabd_s16
-// CHECK: vabd.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
+// CHECK: ret <4 x i16> [[VABD2_I]]
int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) {
return vabd_s16(a, b);
}
-// CHECK-LABEL: test_vabd_s32
-// CHECK: vabd.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
+// CHECK: ret <2 x i32> [[VABD2_I]]
int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) {
return vabd_s32(a, b);
}
-// CHECK-LABEL: test_vabd_u8
-// CHECK: vabd.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VABD_I]]
uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) {
return vabd_u8(a, b);
}
-// CHECK-LABEL: test_vabd_u16
-// CHECK: vabd.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
+// CHECK: ret <4 x i16> [[VABD2_I]]
uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) {
return vabd_u16(a, b);
}
-// CHECK-LABEL: test_vabd_u32
-// CHECK: vabd.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
+// CHECK: ret <2 x i32> [[VABD2_I]]
uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) {
return vabd_u32(a, b);
}
-// CHECK-LABEL: test_vabd_f32
-// CHECK: vabd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) #4
+// CHECK: ret <2 x float> [[VABD2_I]]
float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) {
return vabd_f32(a, b);
}
-// CHECK-LABEL: test_vabdq_s8
-// CHECK: vabd.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VABD_I]]
int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) {
return vabdq_s8(a, b);
}
-// CHECK-LABEL: test_vabdq_s16
-// CHECK: vabd.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
+// CHECK: ret <8 x i16> [[VABD2_I]]
int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) {
return vabdq_s16(a, b);
}
-// CHECK-LABEL: test_vabdq_s32
-// CHECK: vabd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
+// CHECK: ret <4 x i32> [[VABD2_I]]
int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) {
return vabdq_s32(a, b);
}
-// CHECK-LABEL: test_vabdq_u8
-// CHECK: vabd.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VABD_I]]
uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) {
return vabdq_u8(a, b);
}
-// CHECK-LABEL: test_vabdq_u16
-// CHECK: vabd.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
+// CHECK: ret <8 x i16> [[VABD2_I]]
uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) {
return vabdq_u16(a, b);
}
-// CHECK-LABEL: test_vabdq_u32
-// CHECK: vabd.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
+// CHECK: ret <4 x i32> [[VABD2_I]]
uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) {
return vabdq_u32(a, b);
}
-// CHECK-LABEL: test_vabdq_f32
-// CHECK: vabd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]]) #4
+// CHECK: ret <4 x float> [[VABD2_I]]
float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) {
return vabdq_f32(a, b);
}
-// CHECK-LABEL: test_vabdl_s8
-// CHECK: vabdl.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I_I]]
int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
return vabdl_s8(a, b);
}
-// CHECK-LABEL: test_vabdl_s16
-// CHECK: vabdl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I_I]]
int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
return vabdl_s16(a, b);
}
-// CHECK-LABEL: test_vabdl_s32
-// CHECK: vabdl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I_I]]
int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
return vabdl_s32(a, b);
}
-// CHECK-LABEL: test_vabdl_u8
-// CHECK: vabdl.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I_I]]
uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
return vabdl_u8(a, b);
}
-// CHECK-LABEL: test_vabdl_u16
-// CHECK: vabdl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I_I]]
uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
return vabdl_u16(a, b);
}
-// CHECK-LABEL: test_vabdl_u32
-// CHECK: vabdl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I_I]]
uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
return vabdl_u32(a, b);
}
-// CHECK-LABEL: test_vabs_s8
-// CHECK: vabs.s8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
+// CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %a) #4
+// CHECK: ret <8 x i8> [[VABS_I]]
int8x8_t test_vabs_s8(int8x8_t a) {
return vabs_s8(a);
}
-// CHECK-LABEL: test_vabs_s16
-// CHECK: vabs.s16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> [[VABS_I]]) #4
+// CHECK: ret <4 x i16> [[VABS1_I]]
int16x4_t test_vabs_s16(int16x4_t a) {
return vabs_s16(a);
}
-// CHECK-LABEL: test_vabs_s32
-// CHECK: vabs.s32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> [[VABS_I]]) #4
+// CHECK: ret <2 x i32> [[VABS1_I]]
int32x2_t test_vabs_s32(int32x2_t a) {
return vabs_s32(a);
}
-// CHECK-LABEL: test_vabs_f32
-// CHECK: vabs.f32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vabs_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]]) #4
+// CHECK: ret <2 x float> [[VABS1_I]]
float32x2_t test_vabs_f32(float32x2_t a) {
return vabs_f32(a);
}
-// CHECK-LABEL: test_vabsq_s8
-// CHECK: vabs.s8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
+// CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %a) #4
+// CHECK: ret <16 x i8> [[VABS_I]]
int8x16_t test_vabsq_s8(int8x16_t a) {
return vabsq_s8(a);
}
-// CHECK-LABEL: test_vabsq_s16
-// CHECK: vabs.s16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> [[VABS_I]]) #4
+// CHECK: ret <8 x i16> [[VABS1_I]]
int16x8_t test_vabsq_s16(int16x8_t a) {
return vabsq_s16(a);
}
-// CHECK-LABEL: test_vabsq_s32
-// CHECK: vabs.s32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> [[VABS_I]]) #4
+// CHECK: ret <4 x i32> [[VABS1_I]]
int32x4_t test_vabsq_s32(int32x4_t a) {
return vabsq_s32(a);
}
-// CHECK-LABEL: test_vabsq_f32
-// CHECK: vabs.f32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vabsq_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]]) #4
+// CHECK: ret <4 x float> [[VABS1_I]]
float32x4_t test_vabsq_f32(float32x4_t a) {
return vabsq_f32(a);
}
-// CHECK-LABEL: test_vadd_s8
-// CHECK: vadd.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[ADD_I]]
int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) {
return vadd_s8(a, b);
}
-// CHECK-LABEL: test_vadd_s16
-// CHECK: vadd.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[ADD_I]]
int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) {
return vadd_s16(a, b);
}
-// CHECK-LABEL: test_vadd_s32
-// CHECK: vadd.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[ADD_I]]
int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) {
return vadd_s32(a, b);
}
-// CHECK-LABEL: test_vadd_s64
-// CHECK: vadd.i64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
+// CHECK: ret <1 x i64> [[ADD_I]]
int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) {
return vadd_s64(a, b);
}
-// CHECK-LABEL: test_vadd_f32
-// CHECK: vadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, %b
+// CHECK: ret <2 x float> [[ADD_I]]
float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) {
return vadd_f32(a, b);
}
-// CHECK-LABEL: test_vadd_u8
-// CHECK: vadd.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[ADD_I]]
uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) {
return vadd_u8(a, b);
}
-// CHECK-LABEL: test_vadd_u16
-// CHECK: vadd.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[ADD_I]]
uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) {
return vadd_u16(a, b);
}
-// CHECK-LABEL: test_vadd_u32
-// CHECK: vadd.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[ADD_I]]
uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) {
return vadd_u32(a, b);
}
-// CHECK-LABEL: test_vadd_u64
-// CHECK: vadd.i64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
+// CHECK: ret <1 x i64> [[ADD_I]]
uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) {
return vadd_u64(a, b);
}
-// CHECK-LABEL: test_vaddq_s8
-// CHECK: vadd.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[ADD_I]]
int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) {
return vaddq_s8(a, b);
}
-// CHECK-LABEL: test_vaddq_s16
-// CHECK: vadd.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) {
return vaddq_s16(a, b);
}
-// CHECK-LABEL: test_vaddq_s32
-// CHECK: vadd.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) {
return vaddq_s32(a, b);
}
-// CHECK-LABEL: test_vaddq_s64
-// CHECK: vadd.i64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) {
return vaddq_s64(a, b);
}
-// CHECK-LABEL: test_vaddq_f32
-// CHECK: vadd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, %b
+// CHECK: ret <4 x float> [[ADD_I]]
float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) {
return vaddq_f32(a, b);
}
-// CHECK-LABEL: test_vaddq_u8
-// CHECK: vadd.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[ADD_I]]
uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) {
return vaddq_u8(a, b);
}
-// CHECK-LABEL: test_vaddq_u16
-// CHECK: vadd.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) {
return vaddq_u16(a, b);
}
-// CHECK-LABEL: test_vaddq_u32
-// CHECK: vadd.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) {
return vaddq_u32(a, b);
}
-// CHECK-LABEL: test_vaddq_u64
-// CHECK: vadd.i64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) {
return vaddq_u64(a, b);
}
-// CHECK-LABEL: test_vaddhn_s16
-// CHECK: vaddhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VADDHN2_I]]
int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
return vaddhn_s16(a, b);
}
-// CHECK-LABEL: test_vaddhn_s32
-// CHECK: vaddhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VADDHN2_I]]
int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
return vaddhn_s32(a, b);
}
-// CHECK-LABEL: test_vaddhn_s64
-// CHECK: vaddhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VADDHN2_I]]
int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
return vaddhn_s64(a, b);
}
-// CHECK-LABEL: test_vaddhn_u16
-// CHECK: vaddhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VADDHN2_I]]
uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
return vaddhn_u16(a, b);
}
-// CHECK-LABEL: test_vaddhn_u32
-// CHECK: vaddhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VADDHN2_I]]
uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
return vaddhn_u32(a, b);
}
-// CHECK-LABEL: test_vaddhn_u64
-// CHECK: vaddhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VADDHN2_I]]
uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
return vaddhn_u64(a, b);
}
-// CHECK-LABEL: test_vaddl_s8
-// CHECK: vaddl.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
return vaddl_s8(a, b);
}
-// CHECK-LABEL: test_vaddl_s16
-// CHECK: vaddl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
return vaddl_s16(a, b);
}
-// CHECK-LABEL: test_vaddl_s32
-// CHECK: vaddl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
return vaddl_s32(a, b);
}
-// CHECK-LABEL: test_vaddl_u8
-// CHECK: vaddl.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
return vaddl_u8(a, b);
}
-// CHECK-LABEL: test_vaddl_u16
-// CHECK: vaddl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
return vaddl_u16(a, b);
}
-// CHECK-LABEL: test_vaddl_u32
-// CHECK: vaddl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
return vaddl_u32(a, b);
}
-// CHECK-LABEL: test_vaddw_s8
-// CHECK: vaddw.s8 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
return vaddw_s8(a, b);
}
-// CHECK-LABEL: test_vaddw_s16
-// CHECK: vaddw.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
return vaddw_s16(a, b);
}
-// CHECK-LABEL: test_vaddw_s32
-// CHECK: vaddw.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
return vaddw_s32(a, b);
}
-// CHECK-LABEL: test_vaddw_u8
-// CHECK: vaddw.u8 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
return vaddw_u8(a, b);
}
-// CHECK-LABEL: test_vaddw_u16
-// CHECK: vaddw.u16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
return vaddw_u16(a, b);
}
-// CHECK-LABEL: test_vaddw_u32
-// CHECK: vaddw.u32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
return vaddw_u32(a, b);
}
-// CHECK-LABEL: test_vand_s8
-// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vand_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[AND_I]]
int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
return vand_s8(a, b);
}
-// CHECK-LABEL: test_vand_s16
-// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vand_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[AND_I]]
int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
return vand_s16(a, b);
}
-// CHECK-LABEL: test_vand_s32
-// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vand_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[AND_I]]
int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
return vand_s32(a, b);
}
-// CHECK-LABEL: test_vand_s64
-// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vand_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
+// CHECK: ret <1 x i64> [[AND_I]]
int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
return vand_s64(a, b);
}
-// CHECK-LABEL: test_vand_u8
-// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vand_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[AND_I]]
uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
return vand_u8(a, b);
}
-// CHECK-LABEL: test_vand_u16
-// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vand_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[AND_I]]
uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
return vand_u16(a, b);
}
-// CHECK-LABEL: test_vand_u32
-// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vand_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[AND_I]]
uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
return vand_u32(a, b);
}
-// CHECK-LABEL: test_vand_u64
-// CHECK: vand d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vand_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
+// CHECK: ret <1 x i64> [[AND_I]]
uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
return vand_u64(a, b);
}
-// CHECK-LABEL: test_vandq_s8
-// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[AND_I]]
int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
return vandq_s8(a, b);
}
-// CHECK-LABEL: test_vandq_s16
-// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[AND_I]]
int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
return vandq_s16(a, b);
}
-// CHECK-LABEL: test_vandq_s32
-// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[AND_I]]
int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
return vandq_s32(a, b);
}
-// CHECK-LABEL: test_vandq_s64
-// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
+// CHECK: ret <2 x i64> [[AND_I]]
int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
return vandq_s64(a, b);
}
-// CHECK-LABEL: test_vandq_u8
-// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[AND_I]]
uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
return vandq_u8(a, b);
}
-// CHECK-LABEL: test_vandq_u16
-// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[AND_I]]
uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
return vandq_u16(a, b);
}
-// CHECK-LABEL: test_vandq_u32
-// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[AND_I]]
uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
return vandq_u32(a, b);
}
-// CHECK-LABEL: test_vandq_u64
-// CHECK: vand q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
+// CHECK: ret <2 x i64> [[AND_I]]
uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
return vandq_u64(a, b);
}
-// CHECK-LABEL: test_vbic_s8
-// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vbic_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
+// CHECK: ret <8 x i8> [[AND_I]]
int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
return vbic_s8(a, b);
}
-// CHECK-LABEL: test_vbic_s16
-// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vbic_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
+// CHECK: ret <4 x i16> [[AND_I]]
int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
return vbic_s16(a, b);
}
-// CHECK-LABEL: test_vbic_s32
-// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vbic_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
+// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
+// CHECK: ret <2 x i32> [[AND_I]]
int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
return vbic_s32(a, b);
}
-// CHECK-LABEL: test_vbic_s64
-// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vbic_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
+// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
+// CHECK: ret <1 x i64> [[AND_I]]
int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
return vbic_s64(a, b);
}
-// CHECK-LABEL: test_vbic_u8
-// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vbic_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
+// CHECK: ret <8 x i8> [[AND_I]]
uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
return vbic_u8(a, b);
}
-// CHECK-LABEL: test_vbic_u16
-// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vbic_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
+// CHECK: ret <4 x i16> [[AND_I]]
uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
return vbic_u16(a, b);
}
-// CHECK-LABEL: test_vbic_u32
-// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vbic_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
+// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
+// CHECK: ret <2 x i32> [[AND_I]]
uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
return vbic_u32(a, b);
}
-// CHECK-LABEL: test_vbic_u64
-// CHECK: vbic d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vbic_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
+// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
+// CHECK: ret <1 x i64> [[AND_I]]
uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
return vbic_u64(a, b);
}
-// CHECK-LABEL: test_vbicq_s8
-// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
+// CHECK: ret <16 x i8> [[AND_I]]
int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
return vbicq_s8(a, b);
}
-// CHECK-LABEL: test_vbicq_s16
-// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
+// CHECK: ret <8 x i16> [[AND_I]]
int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
return vbicq_s16(a, b);
}
-// CHECK-LABEL: test_vbicq_s32
-// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
+// CHECK: ret <4 x i32> [[AND_I]]
int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
return vbicq_s32(a, b);
}
-// CHECK-LABEL: test_vbicq_s64
-// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
+// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
+// CHECK: ret <2 x i64> [[AND_I]]
int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
return vbicq_s64(a, b);
}
-// CHECK-LABEL: test_vbicq_u8
-// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
+// CHECK: ret <16 x i8> [[AND_I]]
uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
return vbicq_u8(a, b);
}
-// CHECK-LABEL: test_vbicq_u16
-// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
+// CHECK: ret <8 x i16> [[AND_I]]
uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
return vbicq_u16(a, b);
}
-// CHECK-LABEL: test_vbicq_u32
-// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
+// CHECK: ret <4 x i32> [[AND_I]]
uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
return vbicq_u32(a, b);
}
-// CHECK-LABEL: test_vbicq_u64
-// CHECK: vbic q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
+// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
+// CHECK: ret <2 x i64> [[AND_I]]
uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
return vbicq_u64(a, b);
}
-// CHECK-LABEL: test_vbsl_s8
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %a, %b
+// CHECK: [[TMP0:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %c
+// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <8 x i8> [[VBSL2_I]]
int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) {
return vbsl_s8(a, b, c);
}
-// CHECK-LABEL: test_vbsl_s16
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vbsl_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i16> [[VBSL5_I]]
int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) {
return vbsl_s16(a, b, c);
}
-// CHECK-LABEL: test_vbsl_s32
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <2 x i32> [[VBSL5_I]]
int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) {
return vbsl_s32(a, b, c);
}
-// CHECK-LABEL: test_vbsl_s64
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <1 x i64> [[VBSL5_I]]
int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) {
return vbsl_s64(a, b, c);
}
-// CHECK-LABEL: test_vbsl_u8
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %a, %b
+// CHECK: [[TMP0:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %c
+// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <8 x i8> [[VBSL2_I]]
uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vbsl_u8(a, b, c);
}
-// CHECK-LABEL: test_vbsl_u16
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i16> [[VBSL5_I]]
uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vbsl_u16(a, b, c);
}
-// CHECK-LABEL: test_vbsl_u32
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <2 x i32> [[VBSL5_I]]
uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vbsl_u32(a, b, c);
}
-// CHECK-LABEL: test_vbsl_u64
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <1 x i64> [[VBSL5_I]]
uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
return vbsl_u64(a, b, c);
}
-// CHECK-LABEL: test_vbsl_f32
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x i32> %a, <2 x float> %b, <2 x float> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
+// CHECK: ret <2 x float> [[TMP4]]
float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) {
return vbsl_f32(a, b, c);
}
-// CHECK-LABEL: test_vbsl_p8
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %a, %b
+// CHECK: [[TMP0:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %c
+// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <8 x i8> [[VBSL2_I]]
poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
return vbsl_p8(a, b, c);
}
-// CHECK-LABEL: test_vbsl_p16
-// CHECK: vbsl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i16> [[VBSL5_I]]
poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
return vbsl_p16(a, b, c);
}
-// CHECK-LABEL: test_vbslq_s8
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %a, %b
+// CHECK: [[TMP0:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %c
+// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <16 x i8> [[VBSL2_I]]
int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) {
return vbslq_s8(a, b, c);
}
-// CHECK-LABEL: test_vbslq_s16
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <8 x i16> [[VBSL5_I]]
int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) {
return vbslq_s16(a, b, c);
}
-// CHECK-LABEL: test_vbslq_s32
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i32> [[VBSL5_I]]
int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) {
return vbslq_s32(a, b, c);
}
-// CHECK-LABEL: test_vbslq_s64
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <2 x i64> [[VBSL5_I]]
int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) {
return vbslq_s64(a, b, c);
}
-// CHECK-LABEL: test_vbslq_u8
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %a, %b
+// CHECK: [[TMP0:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %c
+// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <16 x i8> [[VBSL2_I]]
uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vbslq_u8(a, b, c);
}
-// CHECK-LABEL: test_vbslq_u16
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <8 x i16> [[VBSL5_I]]
uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vbslq_u16(a, b, c);
}
-// CHECK-LABEL: test_vbslq_u32
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i32> [[VBSL5_I]]
uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vbslq_u32(a, b, c);
}
-// CHECK-LABEL: test_vbslq_u64
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <2 x i64> [[VBSL5_I]]
uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
return vbslq_u64(a, b, c);
}
-// CHECK-LABEL: test_vbslq_f32
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %a, <4 x float> %b, <4 x float> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
+// CHECK: ret <4 x float> [[TMP4]]
float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) {
return vbslq_f32(a, b, c);
}
-// CHECK-LABEL: test_vbslq_p8
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %a, %b
+// CHECK: [[TMP0:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %c
+// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <16 x i8> [[VBSL2_I]]
poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
return vbslq_p8(a, b, c);
}
-// CHECK-LABEL: test_vbslq_p16
-// CHECK: vbsl q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <8 x i16> [[VBSL5_I]]
poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
return vbslq_p16(a, b, c);
}
-// CHECK-LABEL: test_vcage_f32
-// CHECK: vacge.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4
+// CHECK: ret <2 x i32> [[VCAGE_V2_I]]
uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) {
return vcage_f32(a, b);
}
-// CHECK-LABEL: test_vcageq_f32
-// CHECK: vacge.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4
+// CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) {
return vcageq_f32(a, b);
}
-// CHECK-LABEL: test_vcagt_f32
-// CHECK: vacgt.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4
+// CHECK: ret <2 x i32> [[VCAGT_V2_I]]
uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) {
return vcagt_f32(a, b);
}
-// CHECK-LABEL: test_vcagtq_f32
-// CHECK: vacgt.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4
+// CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) {
return vcagtq_f32(a, b);
}
-// CHECK-LABEL: test_vcale_f32
-// CHECK: vacge.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4
+// CHECK: ret <2 x i32> [[VCALE_V2_I]]
uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) {
return vcale_f32(a, b);
}
-// CHECK-LABEL: test_vcaleq_f32
-// CHECK: vacge.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4
+// CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) {
return vcaleq_f32(a, b);
}
-// CHECK-LABEL: test_vcalt_f32
-// CHECK: vacgt.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4
+// CHECK: ret <2 x i32> [[VCALT_V2_I]]
uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) {
return vcalt_f32(a, b);
}
-// CHECK-LABEL: test_vcaltq_f32
-// CHECK: vacgt.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4
+// CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) {
return vcaltq_f32(a, b);
}
-// CHECK-LABEL: test_vceq_s8
-// CHECK: vceq.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) {
return vceq_s8(a, b);
}
-// CHECK-LABEL: test_vceq_s16
-// CHECK: vceq.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) {
return vceq_s16(a, b);
}
-// CHECK-LABEL: test_vceq_s32
-// CHECK: vceq.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) {
return vceq_s32(a, b);
}
-// CHECK-LABEL: test_vceq_f32
-// CHECK: vceq.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) {
return vceq_f32(a, b);
}
-// CHECK-LABEL: test_vceq_u8
-// CHECK: vceq.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) {
return vceq_u8(a, b);
}
-// CHECK-LABEL: test_vceq_u16
-// CHECK: vceq.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) {
return vceq_u16(a, b);
}
-// CHECK-LABEL: test_vceq_u32
-// CHECK: vceq.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) {
return vceq_u32(a, b);
}
-// CHECK-LABEL: test_vceq_p8
-// CHECK: vceq.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) {
return vceq_p8(a, b);
}
-// CHECK-LABEL: test_vceqq_s8
-// CHECK: vceq.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) {
return vceqq_s8(a, b);
}
-// CHECK-LABEL: test_vceqq_s16
-// CHECK: vceq.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) {
return vceqq_s16(a, b);
}
-// CHECK-LABEL: test_vceqq_s32
-// CHECK: vceq.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) {
return vceqq_s32(a, b);
}
-// CHECK-LABEL: test_vceqq_f32
-// CHECK: vceq.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) {
return vceqq_f32(a, b);
}
-// CHECK-LABEL: test_vceqq_u8
-// CHECK: vceq.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) {
return vceqq_u8(a, b);
}
-// CHECK-LABEL: test_vceqq_u16
-// CHECK: vceq.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) {
return vceqq_u16(a, b);
}
-// CHECK-LABEL: test_vceqq_u32
-// CHECK: vceq.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) {
return vceqq_u32(a, b);
}
-// CHECK-LABEL: test_vceqq_p8
-// CHECK: vceq.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) {
return vceqq_p8(a, b);
}
-// CHECK-LABEL: test_vcge_s8
-// CHECK: vcge.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) {
return vcge_s8(a, b);
}
-// CHECK-LABEL: test_vcge_s16
-// CHECK: vcge.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) {
return vcge_s16(a, b);
}
-// CHECK-LABEL: test_vcge_s32
-// CHECK: vcge.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) {
return vcge_s32(a, b);
}
-// CHECK-LABEL: test_vcge_f32
-// CHECK: vcge.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) {
return vcge_f32(a, b);
}
-// CHECK-LABEL: test_vcge_u8
-// CHECK: vcge.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) {
return vcge_u8(a, b);
}
-// CHECK-LABEL: test_vcge_u16
-// CHECK: vcge.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) {
return vcge_u16(a, b);
}
-// CHECK-LABEL: test_vcge_u32
-// CHECK: vcge.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) {
return vcge_u32(a, b);
}
-// CHECK-LABEL: test_vcgeq_s8
-// CHECK: vcge.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) {
return vcgeq_s8(a, b);
}
-// CHECK-LABEL: test_vcgeq_s16
-// CHECK: vcge.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) {
return vcgeq_s16(a, b);
}
-// CHECK-LABEL: test_vcgeq_s32
-// CHECK: vcge.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) {
return vcgeq_s32(a, b);
}
-// CHECK-LABEL: test_vcgeq_f32
-// CHECK: vcge.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) {
return vcgeq_f32(a, b);
}
-// CHECK-LABEL: test_vcgeq_u8
-// CHECK: vcge.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) {
return vcgeq_u8(a, b);
}
-// CHECK-LABEL: test_vcgeq_u16
-// CHECK: vcge.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) {
return vcgeq_u16(a, b);
}
-// CHECK-LABEL: test_vcgeq_u32
-// CHECK: vcge.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) {
return vcgeq_u32(a, b);
}
-// CHECK-LABEL: test_vcgt_s8
-// CHECK: vcgt.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) {
return vcgt_s8(a, b);
}
-// CHECK-LABEL: test_vcgt_s16
-// CHECK: vcgt.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) {
return vcgt_s16(a, b);
}
-// CHECK-LABEL: test_vcgt_s32
-// CHECK: vcgt.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) {
return vcgt_s32(a, b);
}
-// CHECK-LABEL: test_vcgt_f32
-// CHECK: vcgt.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) {
return vcgt_f32(a, b);
}
-// CHECK-LABEL: test_vcgt_u8
-// CHECK: vcgt.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) {
return vcgt_u8(a, b);
}
-// CHECK-LABEL: test_vcgt_u16
-// CHECK: vcgt.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) {
return vcgt_u16(a, b);
}
-// CHECK-LABEL: test_vcgt_u32
-// CHECK: vcgt.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) {
return vcgt_u32(a, b);
}
-// CHECK-LABEL: test_vcgtq_s8
-// CHECK: vcgt.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) {
return vcgtq_s8(a, b);
}
-// CHECK-LABEL: test_vcgtq_s16
-// CHECK: vcgt.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) {
return vcgtq_s16(a, b);
}
-// CHECK-LABEL: test_vcgtq_s32
-// CHECK: vcgt.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) {
return vcgtq_s32(a, b);
}
-// CHECK-LABEL: test_vcgtq_f32
-// CHECK: vcgt.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) {
return vcgtq_f32(a, b);
}
-// CHECK-LABEL: test_vcgtq_u8
-// CHECK: vcgt.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) {
return vcgtq_u8(a, b);
}
-// CHECK-LABEL: test_vcgtq_u16
-// CHECK: vcgt.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) {
return vcgtq_u16(a, b);
}
-// CHECK-LABEL: test_vcgtq_u32
-// CHECK: vcgt.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) {
return vcgtq_u32(a, b);
}
-// CHECK-LABEL: test_vcle_s8
-// CHECK: vcge.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) {
return vcle_s8(a, b);
}
-// CHECK-LABEL: test_vcle_s16
-// CHECK: vcge.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) {
return vcle_s16(a, b);
}
-// CHECK-LABEL: test_vcle_s32
-// CHECK: vcge.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) {
return vcle_s32(a, b);
}
-// CHECK-LABEL: test_vcle_f32
-// CHECK: vcge.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) {
return vcle_f32(a, b);
}
-// CHECK-LABEL: test_vcle_u8
-// CHECK: vcge.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) {
return vcle_u8(a, b);
}
-// CHECK-LABEL: test_vcle_u16
-// CHECK: vcge.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) {
return vcle_u16(a, b);
}
-// CHECK-LABEL: test_vcle_u32
-// CHECK: vcge.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) {
return vcle_u32(a, b);
}
-// CHECK-LABEL: test_vcleq_s8
-// CHECK: vcge.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) {
return vcleq_s8(a, b);
}
-// CHECK-LABEL: test_vcleq_s16
-// CHECK: vcge.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) {
return vcleq_s16(a, b);
}
-// CHECK-LABEL: test_vcleq_s32
-// CHECK: vcge.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) {
return vcleq_s32(a, b);
}
-// CHECK-LABEL: test_vcleq_f32
-// CHECK: vcge.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) {
return vcleq_f32(a, b);
}
-// CHECK-LABEL: test_vcleq_u8
-// CHECK: vcge.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) {
return vcleq_u8(a, b);
}
-// CHECK-LABEL: test_vcleq_u16
-// CHECK: vcge.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) {
return vcleq_u16(a, b);
}
-// CHECK-LABEL: test_vcleq_u32
-// CHECK: vcge.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) {
return vcleq_u32(a, b);
}
-// CHECK-LABEL: test_vcls_s8
-// CHECK: vcls.s8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
+// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %a) #4
+// CHECK: ret <8 x i8> [[VCLS_V_I]]
int8x8_t test_vcls_s8(int8x8_t a) {
return vcls_s8(a);
}
-// CHECK-LABEL: test_vcls_s16
-// CHECK: vcls.s16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> [[VCLS_V_I]]) #4
+// CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP1]]
int16x4_t test_vcls_s16(int16x4_t a) {
return vcls_s16(a);
}
-// CHECK-LABEL: test_vcls_s32
-// CHECK: vcls.s32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> [[VCLS_V_I]]) #4
+// CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP1]]
int32x2_t test_vcls_s32(int32x2_t a) {
return vcls_s32(a);
}
-// CHECK-LABEL: test_vclsq_s8
-// CHECK: vcls.s8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
+// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %a) #4
+// CHECK: ret <16 x i8> [[VCLSQ_V_I]]
int8x16_t test_vclsq_s8(int8x16_t a) {
return vclsq_s8(a);
}
-// CHECK-LABEL: test_vclsq_s16
-// CHECK: vcls.s16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> [[VCLSQ_V_I]]) #4
+// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP1]]
int16x8_t test_vclsq_s16(int16x8_t a) {
return vclsq_s16(a);
}
-// CHECK-LABEL: test_vclsq_s32
-// CHECK: vcls.s32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> [[VCLSQ_V_I]]) #4
+// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP1]]
int32x4_t test_vclsq_s32(int32x4_t a) {
return vclsq_s32(a);
}
-// CHECK-LABEL: test_vclt_s8
-// CHECK: vcgt.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) {
return vclt_s8(a, b);
}
-// CHECK-LABEL: test_vclt_s16
-// CHECK: vcgt.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) {
return vclt_s16(a, b);
}
-// CHECK-LABEL: test_vclt_s32
-// CHECK: vcgt.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) {
return vclt_s32(a, b);
}
-// CHECK-LABEL: test_vclt_f32
-// CHECK: vcgt.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) {
return vclt_f32(a, b);
}
-// CHECK-LABEL: test_vclt_u8
-// CHECK: vcgt.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) {
return vclt_u8(a, b);
}
-// CHECK-LABEL: test_vclt_u16
-// CHECK: vcgt.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) {
return vclt_u16(a, b);
}
-// CHECK-LABEL: test_vclt_u32
-// CHECK: vcgt.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) {
return vclt_u32(a, b);
}
-// CHECK-LABEL: test_vcltq_s8
-// CHECK: vcgt.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) {
return vcltq_s8(a, b);
}
-// CHECK-LABEL: test_vcltq_s16
-// CHECK: vcgt.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) {
return vcltq_s16(a, b);
}
-// CHECK-LABEL: test_vcltq_s32
-// CHECK: vcgt.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) {
return vcltq_s32(a, b);
}
-// CHECK-LABEL: test_vcltq_f32
-// CHECK: vcgt.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) {
return vcltq_f32(a, b);
}
-// CHECK-LABEL: test_vcltq_u8
-// CHECK: vcgt.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) {
return vcltq_u8(a, b);
}
-// CHECK-LABEL: test_vcltq_u16
-// CHECK: vcgt.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) {
return vcltq_u16(a, b);
}
-// CHECK-LABEL: test_vcltq_u32
-// CHECK: vcgt.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) {
return vcltq_u32(a, b);
}
-// CHECK-LABEL: test_vclz_s8
-// CHECK: vclz.i8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
+// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
+// CHECK: ret <8 x i8> [[VCLZ_V_I]]
int8x8_t test_vclz_s8(int8x8_t a) {
return vclz_s8(a);
}
-// CHECK-LABEL: test_vclz_s16
-// CHECK: vclz.i16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4
+// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP1]]
int16x4_t test_vclz_s16(int16x4_t a) {
return vclz_s16(a);
}
-// CHECK-LABEL: test_vclz_s32
-// CHECK: vclz.i32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4
+// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP1]]
int32x2_t test_vclz_s32(int32x2_t a) {
return vclz_s32(a);
}
-// CHECK-LABEL: test_vclz_u8
-// CHECK: vclz.i8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vclz_u8(<8 x i8> %a) #0 {
+// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
+// CHECK: ret <8 x i8> [[VCLZ_V_I]]
uint8x8_t test_vclz_u8(uint8x8_t a) {
return vclz_u8(a);
}
-// CHECK-LABEL: test_vclz_u16
-// CHECK: vclz.i16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vclz_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4
+// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP1]]
uint16x4_t test_vclz_u16(uint16x4_t a) {
return vclz_u16(a);
}
-// CHECK-LABEL: test_vclz_u32
-// CHECK: vclz.i32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vclz_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4
+// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP1]]
uint32x2_t test_vclz_u32(uint32x2_t a) {
return vclz_u32(a);
}
-// CHECK-LABEL: test_vclzq_s8
-// CHECK: vclz.i8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
+// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
+// CHECK: ret <16 x i8> [[VCLZQ_V_I]]
int8x16_t test_vclzq_s8(int8x16_t a) {
return vclzq_s8(a);
}
-// CHECK-LABEL: test_vclzq_s16
-// CHECK: vclz.i16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4
+// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP1]]
int16x8_t test_vclzq_s16(int16x8_t a) {
return vclzq_s16(a);
}
-// CHECK-LABEL: test_vclzq_s32
-// CHECK: vclz.i32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4
+// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP1]]
int32x4_t test_vclzq_s32(int32x4_t a) {
return vclzq_s32(a);
}
-// CHECK-LABEL: test_vclzq_u8
-// CHECK: vclz.i8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vclzq_u8(<16 x i8> %a) #0 {
+// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
+// CHECK: ret <16 x i8> [[VCLZQ_V_I]]
uint8x16_t test_vclzq_u8(uint8x16_t a) {
return vclzq_u8(a);
}
-// CHECK-LABEL: test_vclzq_u16
-// CHECK: vclz.i16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vclzq_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4
+// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP1]]
uint16x8_t test_vclzq_u16(uint16x8_t a) {
return vclzq_u16(a);
}
-// CHECK-LABEL: test_vclzq_u32
-// CHECK: vclz.i32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vclzq_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4
+// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP1]]
uint32x4_t test_vclzq_u32(uint32x4_t a) {
return vclzq_u32(a);
}
-// CHECK-LABEL: test_vcnt_u8
-// CHECK: vcnt.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcnt_u8(<8 x i8> %a) #0 {
+// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
+// CHECK: ret <8 x i8> [[VCNT_V_I]]
uint8x8_t test_vcnt_u8(uint8x8_t a) {
return vcnt_u8(a);
}
-// CHECK-LABEL: test_vcnt_s8
-// CHECK: vcnt.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
+// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
+// CHECK: ret <8 x i8> [[VCNT_V_I]]
int8x8_t test_vcnt_s8(int8x8_t a) {
return vcnt_s8(a);
}
-// CHECK-LABEL: test_vcnt_p8
-// CHECK: vcnt.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vcnt_p8(<8 x i8> %a) #0 {
+// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
+// CHECK: ret <8 x i8> [[VCNT_V_I]]
poly8x8_t test_vcnt_p8(poly8x8_t a) {
return vcnt_p8(a);
}
-// CHECK-LABEL: test_vcntq_u8
-// CHECK: vcnt.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcntq_u8(<16 x i8> %a) #0 {
+// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
+// CHECK: ret <16 x i8> [[VCNTQ_V_I]]
uint8x16_t test_vcntq_u8(uint8x16_t a) {
return vcntq_u8(a);
}
-// CHECK-LABEL: test_vcntq_s8
-// CHECK: vcnt.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
+// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
+// CHECK: ret <16 x i8> [[VCNTQ_V_I]]
int8x16_t test_vcntq_s8(int8x16_t a) {
return vcntq_s8(a);
}
-// CHECK-LABEL: test_vcntq_p8
-// CHECK: vcnt.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcntq_p8(<16 x i8> %a) #0 {
+// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
+// CHECK: ret <16 x i8> [[VCNTQ_V_I]]
poly8x16_t test_vcntq_p8(poly8x16_t a) {
return vcntq_p8(a);
}
-// CHECK-LABEL: test_vcombine_s8
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcombine_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) {
return vcombine_s8(a, b);
}
-// CHECK-LABEL: test_vcombine_s16
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcombine_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) {
return vcombine_s16(a, b);
}
-// CHECK-LABEL: test_vcombine_s32
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcombine_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) {
return vcombine_s32(a, b);
}
-// CHECK-LABEL: test_vcombine_s64
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vcombine_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
+// CHECK: ret <2 x i64> [[SHUFFLE_I]]
int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) {
return vcombine_s64(a, b);
}
-// CHECK-LABEL: test_vcombine_f16
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <8 x half> @test_vcombine_f16(<4 x half> %a, <4 x half> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x half> [[SHUFFLE_I]]
float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) {
return vcombine_f16(a, b);
}
-// CHECK-LABEL: test_vcombine_f32
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vcombine_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x float> [[SHUFFLE_I]]
float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) {
return vcombine_f32(a, b);
}
-// CHECK-LABEL: test_vcombine_u8
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcombine_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) {
return vcombine_u8(a, b);
}
-// CHECK-LABEL: test_vcombine_u16
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcombine_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) {
return vcombine_u16(a, b);
}
-// CHECK-LABEL: test_vcombine_u32
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcombine_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) {
return vcombine_u32(a, b);
}
-// CHECK-LABEL: test_vcombine_u64
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vcombine_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
+// CHECK: ret <2 x i64> [[SHUFFLE_I]]
uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) {
return vcombine_u64(a, b);
}
-// CHECK-LABEL: test_vcombine_p8
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vcombine_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) {
return vcombine_p8(a, b);
}
-// CHECK-LABEL: test_vcombine_p16
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
-// CHECK: vmov d{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vcombine_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) {
return vcombine_p16(a, b);
}
-// CHECK-LABEL: test_vcreate_s8
-// CHECK: vmov [[REG:d[0-9]+]], r0, r1
-// CHECK: vclz.i8 d{{[0-9]+}}, [[REG]]
+// CHECK-LABEL: define <8 x i8> @test_vcreate_s8(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4
+// CHECK: ret <8 x i8> [[VCLZ_V_I]]
int8x8_t test_vcreate_s8(uint64_t a) {
return vclz_s8(vcreate_s8(a));
}
-// CHECK-LABEL: test_vcreate_s16
-// CHECK: vmov [[REG:d[0-9]+]], r0, r1
-// CHECK: vclz.i16 d{{[0-9]+}}, [[REG]]
+// CHECK-LABEL: define <4 x i16> @test_vcreate_s16(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4
+// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vcreate_s16(uint64_t a) {
return vclz_s16(vcreate_s16(a));
}
-// CHECK-LABEL: test_vcreate_s32
-// CHECK: vmov [[REG:d[0-9]+]], r0, r1
-// CHECK: vclz.i32 d{{[0-9]+}}, [[REG]]
+// CHECK-LABEL: define <2 x i32> @test_vcreate_s32(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4
+// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vcreate_s32(uint64_t a) {
return vclz_s32(vcreate_s32(a));
}
-// CHECK-LABEL: test_vcreate_f16
+// CHECK-LABEL: define <4 x half> @test_vcreate_f16(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vcreate_f16(uint64_t a) {
return vcreate_f16(a);
}
-// CHECK-LABEL: test_vcreate_f32
+// CHECK-LABEL: define <2 x float> @test_vcreate_f32(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vcreate_f32(uint64_t a) {
return vcreate_f32(a);
}
-// CHECK-LABEL: test_vcreate_u8
-// CHECK: vmov [[REG:d[0-9]+]], r0, r1
-// CHECK: vclz.i8 d{{[0-9]+}}, [[REG]]
+// CHECK-LABEL: define <8 x i8> @test_vcreate_u8(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4
+// CHECK: ret <8 x i8> [[VCLZ_V_I]]
uint8x8_t test_vcreate_u8(uint64_t a) {
return vclz_s8(vcreate_u8(a));
}
-// CHECK-LABEL: test_vcreate_u16
-// CHECK: vmov [[REG:d[0-9]+]], r0, r1
-// CHECK: vclz.i16 d{{[0-9]+}}, [[REG]]
+// CHECK-LABEL: define <4 x i16> @test_vcreate_u16(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4
+// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vcreate_u16(uint64_t a) {
return vclz_s16(vcreate_u16(a));
}
-// CHECK-LABEL: test_vcreate_u32
-// CHECK: vmov [[REG:d[0-9]+]], r0, r1
-// CHECK: vclz.i32 d{{[0-9]+}}, [[REG]]
+// CHECK-LABEL: define <2 x i32> @test_vcreate_u32(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4
+// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vcreate_u32(uint64_t a) {
return vclz_s32(vcreate_u32(a));
}
@@ -1614,10145 +2449,21932 @@ uint32x2_t test_vcreate_u32(uint64_t a)
// We have two ways of lowering that. Either with one 'vmov d, r, r' or
// with two 'vmov d[],r'. LLVM does the latter. We may want to be less
// strict about the matching pattern if it starts causing problem.
-// CHECK-LABEL: test_vcreate_u64
-// CHECK: vmov.32 [[REG:d[0-9]+]][0], r0
-// CHECK: vmov.32 [[REG]][1], r1
+// CHECK-LABEL: define <1 x i64> @test_vcreate_u64(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
+// CHECK: ret <1 x i64> [[ADD_I]]
uint64x1_t test_vcreate_u64(uint64_t a) {
uint64x1_t tmp = vcreate_u64(a);
return vadd_u64(tmp, tmp);
}
-// CHECK-LABEL: test_vcreate_p8
-// CHECK: vmov [[REG:d[0-9]+]], r0, r1
-// CHECK: vcnt.8 d{{[0-9]+}}, [[REG]]
+// CHECK-LABEL: define <8 x i8> @test_vcreate_p8(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
+// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]]) #4
+// CHECK: ret <8 x i8> [[VCNT_V_I]]
poly8x8_t test_vcreate_p8(uint64_t a) {
return vcnt_p8(vcreate_p8(a));
}
-// CHECK-LABEL: test_vcreate_p16
-// CHECK: vmov [[REG:d[0-9]+]], r0, r1
+// CHECK-LABEL: define <4 x i16> @test_vcreate_p16(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP4:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP4]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i16> [[VBSL5_I]]
poly16x4_t test_vcreate_p16(uint64_t a) {
poly16x4_t tmp = vcreate_p16(a);
return vbsl_p16(tmp, tmp, tmp);
}
-// CHECK-LABEL: test_vcreate_s64
-// CHECK: vmov.32 [[REG:d[0-9]+]][0], r0
-// CHECK: vmov.32 [[REG]][1], r1
+// CHECK-LABEL: define <1 x i64> @test_vcreate_s64(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
+// CHECK: ret <1 x i64> [[ADD_I]]
int64x1_t test_vcreate_s64(uint64_t a) {
int64x1_t tmp = vcreate_s64(a);
return vadd_s64(tmp, tmp);
}
-// CHECK-LABEL: test_vcvt_f16_f32
-// CHECK: vcvt.f16.f32 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x half> @test_vcvt_f16_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]]) #4
+// CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
+// CHECK: ret <4 x half> [[TMP1]]
float16x4_t test_vcvt_f16_f32(float32x4_t a) {
return vcvt_f16_f32(a);
}
-// CHECK-LABEL: test_vcvt_f32_s32
-// CHECK: vcvt.f32.s32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+// CHECK: ret <2 x float> [[VCVT_I]]
float32x2_t test_vcvt_f32_s32(int32x2_t a) {
return vcvt_f32_s32(a);
}
-// CHECK-LABEL: test_vcvt_f32_u32
-// CHECK: vcvt.f32.u32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float>
+// CHECK: ret <2 x float> [[VCVT_I]]
float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
return vcvt_f32_u32(a);
}
-// CHECK-LABEL: test_vcvtq_f32_s32
-// CHECK: vcvt.f32.s32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
+// CHECK: ret <4 x float> [[VCVT_I]]
float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
return vcvtq_f32_s32(a);
}
-// CHECK-LABEL: test_vcvtq_f32_u32
-// CHECK: vcvt.f32.u32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
+// CHECK: ret <4 x float> [[VCVT_I]]
float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
return vcvtq_f32_u32(a);
}
-// CHECK-LABEL: test_vcvt_f32_f16
-// CHECK: vcvt.f32.f16
+// CHECK-LABEL: define <4 x float> @test_vcvt_f32_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #4
+// CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x float>
+// CHECK: ret <4 x float> [[TMP1]]
float32x4_t test_vcvt_f32_f16(float16x4_t a) {
return vcvt_f32_f16(a);
}
-// CHECK-LABEL: test_vcvt_n_f32_s32
-// CHECK: vcvt.f32.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
+// CHECK: ret <2 x float> [[VCVT_N]]1
float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
return vcvt_n_f32_s32(a, 1);
}
-// CHECK-LABEL: test_vcvt_n_f32_u32
-// CHECK: vcvt.f32.u32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
+// CHECK: ret <2 x float> [[VCVT_N]]1
float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
return vcvt_n_f32_u32(a, 1);
}
-// CHECK-LABEL: test_vcvtq_n_f32_s32
-// CHECK: vcvt.f32.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
+// CHECK: ret <4 x float> [[VCVT_N]]1
float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
return vcvtq_n_f32_s32(a, 3);
}
-// CHECK-LABEL: test_vcvtq_n_f32_u32
-// CHECK: vcvt.f32.u32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
+// CHECK: ret <4 x float> [[VCVT_N]]1
float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
return vcvtq_n_f32_u32(a, 3);
}
-// CHECK-LABEL: test_vcvt_n_s32_f32
-// CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
+// CHECK: ret <2 x i32> [[VCVT_N]]1
int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
return vcvt_n_s32_f32(a, 1);
}
-// CHECK-LABEL: test_vcvtq_n_s32_f32
-// CHECK: vcvt.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
+// CHECK: ret <4 x i32> [[VCVT_N]]1
int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
return vcvtq_n_s32_f32(a, 3);
}
-// CHECK-LABEL: test_vcvt_n_u32_f32
-// CHECK: vcvt.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
+// CHECK: ret <2 x i32> [[VCVT_N]]1
uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
return vcvt_n_u32_f32(a, 1);
}
-// CHECK-LABEL: test_vcvtq_n_u32_f32
-// CHECK: vcvt.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
+// CHECK: ret <4 x i32> [[VCVT_N]]1
uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
return vcvtq_n_u32_f32(a, 3);
}
-// CHECK-LABEL: test_vcvt_s32_f32
-// CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[TMP2:%.*]] = fptosi <2 x float> [[TMP1]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vcvt_s32_f32(float32x2_t a) {
return vcvt_s32_f32(a);
}
-// CHECK-LABEL: test_vcvtq_s32_f32
-// CHECK: vcvt.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
return vcvtq_s32_f32(a);
}
-// CHECK-LABEL: test_vcvt_u32_f32
-// CHECK: vcvt.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[TMP2:%.*]] = fptoui <2 x float> [[TMP1]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
return vcvt_u32_f32(a);
}
-// CHECK-LABEL: test_vcvtq_u32_f32
-// CHECK: vcvt.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[TMP2:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
return vcvtq_u32_f32(a);
}
-// CHECK-LABEL: test_vdup_lane_u8
-// CHECK: vdup.8 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i8> @test_vdup_lane_u8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: ret <8 x i8> [[SHUFFLE]]
uint8x8_t test_vdup_lane_u8(uint8x8_t a) {
return vdup_lane_u8(a, 7);
}
-// CHECK-LABEL: test_vdup_lane_u16
-// CHECK: vdup.16 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vdup_lane_u16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <4 x i16> [[SHUFFLE]]
uint16x4_t test_vdup_lane_u16(uint16x4_t a) {
return vdup_lane_u16(a, 3);
}
-// CHECK-LABEL: test_vdup_lane_u32
-// CHECK: vdup.32 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vdup_lane_u32(<2 x i32> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 1>
+// CHECK: ret <2 x i32> [[SHUFFLE]]
uint32x2_t test_vdup_lane_u32(uint32x2_t a) {
return vdup_lane_u32(a, 1);
}
-// CHECK-LABEL: test_vdup_lane_s8
-// CHECK: vdup.8 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i8> @test_vdup_lane_s8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: ret <8 x i8> [[SHUFFLE]]
int8x8_t test_vdup_lane_s8(int8x8_t a) {
return vdup_lane_s8(a, 7);
}
-// CHECK-LABEL: test_vdup_lane_s16
-// CHECK: vdup.16 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vdup_lane_s16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <4 x i16> [[SHUFFLE]]
int16x4_t test_vdup_lane_s16(int16x4_t a) {
return vdup_lane_s16(a, 3);
}
-// CHECK-LABEL: test_vdup_lane_s32
-// CHECK: vdup.32 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vdup_lane_s32(<2 x i32> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 1>
+// CHECK: ret <2 x i32> [[SHUFFLE]]
int32x2_t test_vdup_lane_s32(int32x2_t a) {
return vdup_lane_s32(a, 1);
}
-// CHECK-LABEL: test_vdup_lane_p8
-// CHECK: vdup.8 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i8> @test_vdup_lane_p8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: ret <8 x i8> [[SHUFFLE]]
poly8x8_t test_vdup_lane_p8(poly8x8_t a) {
return vdup_lane_p8(a, 7);
}
-// CHECK-LABEL: test_vdup_lane_p16
-// CHECK: vdup.16 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vdup_lane_p16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <4 x i16> [[SHUFFLE]]
poly16x4_t test_vdup_lane_p16(poly16x4_t a) {
return vdup_lane_p16(a, 3);
}
-// CHECK-LABEL: test_vdup_lane_f32
-// CHECK: vdup.32 d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x float> @test_vdup_lane_f32(<2 x float> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 1>
+// CHECK: ret <2 x float> [[SHUFFLE]]
float32x2_t test_vdup_lane_f32(float32x2_t a) {
return vdup_lane_f32(a, 1);
}
-// CHECK-LABEL: test_vdupq_lane_u8
-// CHECK: vdup.8 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_u8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: ret <16 x i8> [[SHUFFLE]]
uint8x16_t test_vdupq_lane_u8(uint8x8_t a) {
return vdupq_lane_u8(a, 7);
}
-// CHECK-LABEL: test_vdupq_lane_u16
-// CHECK: vdup.16 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_u16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <8 x i16> [[SHUFFLE]]
uint16x8_t test_vdupq_lane_u16(uint16x4_t a) {
return vdupq_lane_u16(a, 3);
}
-// CHECK-LABEL: test_vdupq_lane_u32
-// CHECK: vdup.32 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vdupq_lane_u32(<2 x i32> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: ret <4 x i32> [[SHUFFLE]]
uint32x4_t test_vdupq_lane_u32(uint32x2_t a) {
return vdupq_lane_u32(a, 1);
}
-// CHECK-LABEL: test_vdupq_lane_s8
-// CHECK: vdup.8 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: ret <16 x i8> [[SHUFFLE]]
int8x16_t test_vdupq_lane_s8(int8x8_t a) {
return vdupq_lane_s8(a, 7);
}
-// CHECK-LABEL: test_vdupq_lane_s16
-// CHECK: vdup.16 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <8 x i16> [[SHUFFLE]]
int16x8_t test_vdupq_lane_s16(int16x4_t a) {
return vdupq_lane_s16(a, 3);
}
-// CHECK-LABEL: test_vdupq_lane_s32
-// CHECK: vdup.32 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: ret <4 x i32> [[SHUFFLE]]
int32x4_t test_vdupq_lane_s32(int32x2_t a) {
return vdupq_lane_s32(a, 1);
}
-// CHECK-LABEL: test_vdupq_lane_p8
-// CHECK: vdup.8 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_p8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK: ret <16 x i8> [[SHUFFLE]]
poly8x16_t test_vdupq_lane_p8(poly8x8_t a) {
return vdupq_lane_p8(a, 7);
}
-// CHECK-LABEL: test_vdupq_lane_p16
-// CHECK: vdup.16 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_p16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <8 x i16> [[SHUFFLE]]
poly16x8_t test_vdupq_lane_p16(poly16x4_t a) {
return vdupq_lane_p16(a, 3);
}
-// CHECK-LABEL: test_vdupq_lane_f32
-// CHECK: vdup.32 q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x float> @test_vdupq_lane_f32(<2 x float> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: ret <4 x float> [[SHUFFLE]]
float32x4_t test_vdupq_lane_f32(float32x2_t a) {
return vdupq_lane_f32(a, 1);
}
-// CHECK-LABEL: test_vdup_lane_s64
+// CHECK-LABEL: define <1 x i64> @test_vdup_lane_s64(<1 x i64> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer
+// CHECK: ret <1 x i64> [[SHUFFLE]]
int64x1_t test_vdup_lane_s64(int64x1_t a) {
return vdup_lane_s64(a, 0);
}
-// CHECK-LABEL: test_vdup_lane_u64
+// CHECK-LABEL: define <1 x i64> @test_vdup_lane_u64(<1 x i64> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer
+// CHECK: ret <1 x i64> [[SHUFFLE]]
uint64x1_t test_vdup_lane_u64(uint64x1_t a) {
return vdup_lane_u64(a, 0);
}
-// CHECK-LABEL: test_vdupq_lane_s64
-// CHECK: {{vmov|vdup}}
+// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer
+// CHECK: ret <2 x i64> [[SHUFFLE]]
int64x2_t test_vdupq_lane_s64(int64x1_t a) {
return vdupq_lane_s64(a, 0);
}
-// CHECK-LABEL: test_vdupq_lane_u64
-// CHECK: {{vmov|vdup}}
+// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_u64(<1 x i64> %a) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer
+// CHECK: ret <2 x i64> [[SHUFFLE]]
uint64x2_t test_vdupq_lane_u64(uint64x1_t a) {
return vdupq_lane_u64(a, 0);
}
-// CHECK-LABEL: test_vdup_n_u8
-// CHECK: vmov
+// CHECK-LABEL: define <8 x i8> @test_vdup_n_u8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: ret <8 x i8> [[VECINIT7_I]]
uint8x8_t test_vdup_n_u8(uint8_t a) {
return vdup_n_u8(a);
}
-// CHECK-LABEL: test_vdup_n_u16
-// CHECK: vmov
+// CHECK-LABEL: define <4 x i16> @test_vdup_n_u16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: ret <4 x i16> [[VECINIT3_I]]
uint16x4_t test_vdup_n_u16(uint16_t a) {
return vdup_n_u16(a);
}
-// CHECK-LABEL: test_vdup_n_u32
-// CHECK: mov
+// CHECK-LABEL: define <2 x i32> @test_vdup_n_u32(i32 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
+// CHECK: ret <2 x i32> [[VECINIT1_I]]
uint32x2_t test_vdup_n_u32(uint32_t a) {
return vdup_n_u32(a);
}
-// CHECK-LABEL: test_vdup_n_s8
-// CHECK: vmov
+// CHECK-LABEL: define <8 x i8> @test_vdup_n_s8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: ret <8 x i8> [[VECINIT7_I]]
int8x8_t test_vdup_n_s8(int8_t a) {
return vdup_n_s8(a);
}
-// CHECK-LABEL: test_vdup_n_s16
-// CHECK: vmov
+// CHECK-LABEL: define <4 x i16> @test_vdup_n_s16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: ret <4 x i16> [[VECINIT3_I]]
int16x4_t test_vdup_n_s16(int16_t a) {
return vdup_n_s16(a);
}
-// CHECK-LABEL: test_vdup_n_s32
-// CHECK: mov
+// CHECK-LABEL: define <2 x i32> @test_vdup_n_s32(i32 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
+// CHECK: ret <2 x i32> [[VECINIT1_I]]
int32x2_t test_vdup_n_s32(int32_t a) {
return vdup_n_s32(a);
}
-// CHECK-LABEL: test_vdup_n_p8
-// CHECK: vmov
+// CHECK-LABEL: define <8 x i8> @test_vdup_n_p8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: ret <8 x i8> [[VECINIT7_I]]
poly8x8_t test_vdup_n_p8(poly8_t a) {
return vdup_n_p8(a);
}
-// CHECK-LABEL: test_vdup_n_p16
-// CHECK: vmov
+// CHECK-LABEL: define <4 x i16> @test_vdup_n_p16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: ret <4 x i16> [[VECINIT3_I]]
poly16x4_t test_vdup_n_p16(poly16_t a) {
return vdup_n_p16(a);
}
-// CHECK-LABEL: test_vdup_n_f16
-// CHECK: vld1.16 {{{d[0-9]+\[\]}}}
+// CHECK-LABEL: define <4 x half> @test_vdup_n_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
+// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
+// CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT]]1, half [[TMP0]], i32 2
+// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT]]2, half [[TMP0]], i32 3
+// CHECK: ret <4 x half> [[VECINIT]]3
float16x4_t test_vdup_n_f16(float16_t *a) {
return vdup_n_f16(*a);
}
-// CHECK-LABEL: test_vdup_n_f32
-// CHECK: mov
+// CHECK-LABEL: define <2 x float> @test_vdup_n_f32(float %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
+// CHECK: ret <2 x float> [[VECINIT1_I]]
float32x2_t test_vdup_n_f32(float32_t a) {
return vdup_n_f32(a);
}
-// CHECK-LABEL: test_vdupq_n_u8
-// CHECK: vmov
+// CHECK-LABEL: define <16 x i8> @test_vdupq_n_u8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
+// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
+// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
+// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
+// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
+// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
+// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
+// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
+// CHECK: ret <16 x i8> [[VECINIT15_I]]
uint8x16_t test_vdupq_n_u8(uint8_t a) {
return vdupq_n_u8(a);
}
-// CHECK-LABEL: test_vdupq_n_u16
-// CHECK: vmov
+// CHECK-LABEL: define <8 x i16> @test_vdupq_n_u16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
+// CHECK: ret <8 x i16> [[VECINIT7_I]]
uint16x8_t test_vdupq_n_u16(uint16_t a) {
return vdupq_n_u16(a);
}
-// CHECK-LABEL: test_vdupq_n_u32
-// CHECK: vmov
+// CHECK-LABEL: define <4 x i32> @test_vdupq_n_u32(i32 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
+// CHECK: ret <4 x i32> [[VECINIT3_I]]
uint32x4_t test_vdupq_n_u32(uint32_t a) {
return vdupq_n_u32(a);
}
-// CHECK-LABEL: test_vdupq_n_s8
-// CHECK: vmov
+// CHECK-LABEL: define <16 x i8> @test_vdupq_n_s8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
+// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
+// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
+// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
+// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
+// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
+// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
+// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
+// CHECK: ret <16 x i8> [[VECINIT15_I]]
int8x16_t test_vdupq_n_s8(int8_t a) {
return vdupq_n_s8(a);
}
-// CHECK-LABEL: test_vdupq_n_s16
-// CHECK: vmov
+// CHECK-LABEL: define <8 x i16> @test_vdupq_n_s16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
+// CHECK: ret <8 x i16> [[VECINIT7_I]]
int16x8_t test_vdupq_n_s16(int16_t a) {
return vdupq_n_s16(a);
}
-// CHECK-LABEL: test_vdupq_n_s32
-// CHECK: vmov
+// CHECK-LABEL: define <4 x i32> @test_vdupq_n_s32(i32 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
+// CHECK: ret <4 x i32> [[VECINIT3_I]]
int32x4_t test_vdupq_n_s32(int32_t a) {
return vdupq_n_s32(a);
}
-// CHECK-LABEL: test_vdupq_n_p8
-// CHECK: vmov
+// CHECK-LABEL: define <16 x i8> @test_vdupq_n_p8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
+// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
+// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
+// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
+// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
+// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
+// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
+// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
+// CHECK: ret <16 x i8> [[VECINIT15_I]]
poly8x16_t test_vdupq_n_p8(poly8_t a) {
return vdupq_n_p8(a);
}
-// CHECK-LABEL: test_vdupq_n_p16
-// CHECK: vmov
+// CHECK-LABEL: define <8 x i16> @test_vdupq_n_p16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
+// CHECK: ret <8 x i16> [[VECINIT7_I]]
poly16x8_t test_vdupq_n_p16(poly16_t a) {
return vdupq_n_p16(a);
}
-// CHECK-LABEL: test_vdupq_n_f16
-// CHECK: vld1.16 {{{d[0-9]+\[\], d[0-9]+\[\]}}}
+// CHECK-LABEL: define <8 x half> @test_vdupq_n_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
+// CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
+// CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT]]1, half [[TMP0]], i32 2
+// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT]]2, half [[TMP0]], i32 3
+// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT]]3, half [[TMP0]], i32 4
+// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT]]4, half [[TMP0]], i32 5
+// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT]]5, half [[TMP0]], i32 6
+// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT]]6, half [[TMP0]], i32 7
+// CHECK: ret <8 x half> [[VECINIT]]7
float16x8_t test_vdupq_n_f16(float16_t *a) {
return vdupq_n_f16(*a);
}
-// CHECK-LABEL: test_vdupq_n_f32
-// CHECK: vmov
+// CHECK-LABEL: define <4 x float> @test_vdupq_n_f32(float %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
+// CHECK: ret <4 x float> [[VECINIT3_I]]
float32x4_t test_vdupq_n_f32(float32_t a) {
return vdupq_n_f32(a);
}
-// CHECK-LABEL: test_vdup_n_s64
-// CHECK: vmov
+// CHECK-LABEL: define <1 x i64> @test_vdup_n_s64(i64 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
+// CHECK: ret <1 x i64> [[ADD_I]]
int64x1_t test_vdup_n_s64(int64_t a) {
int64x1_t tmp = vdup_n_s64(a);
return vadd_s64(tmp, tmp);
}
-// CHECK-LABEL: test_vdup_n_u64
-// CHECK: vmov
+// CHECK-LABEL: define <1 x i64> @test_vdup_n_u64(i64 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
+// CHECK: ret <1 x i64> [[ADD_I]]
uint64x1_t test_vdup_n_u64(uint64_t a) {
int64x1_t tmp = vdup_n_u64(a);
return vadd_s64(tmp, tmp);
}
-// CHECK-LABEL: test_vdupq_n_s64
-// CHECK: vmov
+// CHECK-LABEL: define <2 x i64> @test_vdupq_n_s64(i64 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vdupq_n_s64(int64_t a) {
int64x2_t tmp = vdupq_n_s64(a);
return vaddq_s64(tmp, tmp);
}
-// CHECK-LABEL: test_vdupq_n_u64
-// CHECK: vmov
+// CHECK-LABEL: define <2 x i64> @test_vdupq_n_u64(i64 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vdupq_n_u64(uint64_t a) {
int64x2_t tmp = vdupq_n_u64(a);
return vaddq_u64(tmp, tmp);
}
-// CHECK-LABEL: test_veor_s8
-// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_veor_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[XOR_I]]
int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
return veor_s8(a, b);
}
-// CHECK-LABEL: test_veor_s16
-// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_veor_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[XOR_I]]
int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
return veor_s16(a, b);
}
-// CHECK-LABEL: test_veor_s32
-// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_veor_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[XOR_I]]
int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
return veor_s32(a, b);
}
-// CHECK-LABEL: test_veor_s64
-// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_veor_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
+// CHECK: ret <1 x i64> [[XOR_I]]
int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
return veor_s64(a, b);
}
-// CHECK-LABEL: test_veor_u8
-// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_veor_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[XOR_I]]
uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
return veor_u8(a, b);
}
-// CHECK-LABEL: test_veor_u16
-// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_veor_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[XOR_I]]
uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
return veor_u16(a, b);
}
-// CHECK-LABEL: test_veor_u32
-// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_veor_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[XOR_I]]
uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
return veor_u32(a, b);
}
-// CHECK-LABEL: test_veor_u64
-// CHECK: veor d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_veor_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
+// CHECK: ret <1 x i64> [[XOR_I]]
uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
return veor_u64(a, b);
}
-// CHECK-LABEL: test_veorq_s8
-// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[XOR_I]]
int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
return veorq_s8(a, b);
}
-// CHECK-LABEL: test_veorq_s16
-// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[XOR_I]]
int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
return veorq_s16(a, b);
}
-// CHECK-LABEL: test_veorq_s32
-// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[XOR_I]]
int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
return veorq_s32(a, b);
}
-// CHECK-LABEL: test_veorq_s64
-// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
+// CHECK: ret <2 x i64> [[XOR_I]]
int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
return veorq_s64(a, b);
}
-// CHECK-LABEL: test_veorq_u8
-// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[XOR_I]]
uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
return veorq_u8(a, b);
}
-// CHECK-LABEL: test_veorq_u16
-// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[XOR_I]]
uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
return veorq_u16(a, b);
}
-// CHECK-LABEL: test_veorq_u32
-// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[XOR_I]]
uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
return veorq_u32(a, b);
}
-// CHECK-LABEL: test_veorq_u64
-// CHECK: veor q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
+// CHECK: ret <2 x i64> [[XOR_I]]
uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
return veorq_u64(a, b);
}
-// CHECK-LABEL: test_vext_s8
-// CHECK: vext.8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK: ret <8 x i8> [[VEXT]]
int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
return vext_s8(a, b, 7);
}
-// CHECK-LABEL: test_vext_u8
-// CHECK: vext.8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK: ret <8 x i8> [[VEXT]]
uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
return vext_u8(a, b, 7);
}
-// CHECK-LABEL: test_vext_p8
-// CHECK: vext.8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK: ret <8 x i8> [[VEXT]]
poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
return vext_p8(a, b, 7);
}
-// CHECK-LABEL: test_vext_s16
-// CHECK: vext.16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK: ret <4 x i16> [[VEXT]]
int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
return vext_s16(a, b, 3);
}
-// CHECK-LABEL: test_vext_u16
-// CHECK: vext.16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK: ret <4 x i16> [[VEXT]]
uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
return vext_u16(a, b, 3);
}
-// CHECK-LABEL: test_vext_p16
-// CHECK: vext.16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK: ret <4 x i16> [[VEXT]]
poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
return vext_p16(a, b, 3);
}
-// CHECK-LABEL: test_vext_s32
-// CHECK: vext.32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK: ret <2 x i32> [[VEXT]]
int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
return vext_s32(a, b, 1);
}
-// CHECK-LABEL: test_vext_u32
-// CHECK: vext.32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK: ret <2 x i32> [[VEXT]]
uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
return vext_u32(a, b, 1);
}
-// CHECK-LABEL: test_vext_s64
+// CHECK-LABEL: define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK: ret <1 x i64> [[VEXT]]
int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
return vext_s64(a, b, 0);
}
-// CHECK-LABEL: test_vext_u64
+// CHECK-LABEL: define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK: ret <1 x i64> [[VEXT]]
uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
return vext_u64(a, b, 0);
}
-// CHECK-LABEL: test_vext_f32
-// CHECK: vext.32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK: ret <2 x float> [[VEXT]]
float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
return vext_f32(a, b, 1);
}
-// CHECK-LABEL: test_vextq_s8
-// CHECK: vext.8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+// CHECK: ret <16 x i8> [[VEXT]]
int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
return vextq_s8(a, b, 15);
}
-// CHECK-LABEL: test_vextq_u8
-// CHECK: vext.8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+// CHECK: ret <16 x i8> [[VEXT]]
uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
return vextq_u8(a, b, 15);
}
-// CHECK-LABEL: test_vextq_p8
-// CHECK: vext.8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+// CHECK: ret <16 x i8> [[VEXT]]
poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
return vextq_p8(a, b, 15);
}
-// CHECK-LABEL: test_vextq_s16
-// CHECK: vext.16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK: ret <8 x i16> [[VEXT]]
int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
return vextq_s16(a, b, 7);
}
-// CHECK-LABEL: test_vextq_u16
-// CHECK: vext.16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK: ret <8 x i16> [[VEXT]]
uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
return vextq_u16(a, b, 7);
}
-// CHECK-LABEL: test_vextq_p16
-// CHECK: vext.16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK: ret <8 x i16> [[VEXT]]
poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
return vextq_p16(a, b, 7);
}
-// CHECK-LABEL: test_vextq_s32
-// CHECK: vext.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK: ret <4 x i32> [[VEXT]]
int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
return vextq_s32(a, b, 3);
}
-// CHECK-LABEL: test_vextq_u32
-// CHECK: vext.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK: ret <4 x i32> [[VEXT]]
uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
return vextq_u32(a, b, 3);
}
-// CHECK-LABEL: test_vextq_s64
-// CHECK: {{vmov|vdup}}
+// CHECK-LABEL: define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK: ret <2 x i64> [[VEXT]]
int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
return vextq_s64(a, b, 1);
}
-// CHECK-LABEL: test_vextq_u64
-// CHECK: {{vmov|vdup}}
+// CHECK-LABEL: define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK: ret <2 x i64> [[VEXT]]
uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
return vextq_u64(a, b, 1);
}
-// CHECK-LABEL: test_vextq_f32
-// CHECK: vext.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK: ret <4 x float> [[VEXT]]
float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
return vextq_f32(a, b, 3);
}
-// CHECK-LABEL: test_vfma_f32
-// CHECK: vfma.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
+// CHECK: ret <2 x float> [[TMP6]]
float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vfma_f32(a, b, c);
}
-// CHECK-LABEL: test_vfmaq_f32
-// CHECK: vfma.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
+// CHECK: ret <4 x float> [[TMP6]]
float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
return vfmaq_f32(a, b, c);
}
-// CHECK-LABEL: test_vget_high_s8
+// CHECK-LABEL: define <8 x i8> @test_vget_high_s8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
int8x8_t test_vget_high_s8(int8x16_t a) {
return vget_high_s8(a);
}
-// CHECK-LABEL: test_vget_high_s16
+// CHECK-LABEL: define <4 x i16> @test_vget_high_s16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
int16x4_t test_vget_high_s16(int16x8_t a) {
return vget_high_s16(a);
}
-// CHECK-LABEL: test_vget_high_s32
+// CHECK-LABEL: define <2 x i32> @test_vget_high_s32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: ret <2 x i32> [[SHUFFLE_I]]
int32x2_t test_vget_high_s32(int32x4_t a) {
return vget_high_s32(a);
}
-// CHECK-LABEL: test_vget_high_s64
+// CHECK-LABEL: define <1 x i64> @test_vget_high_s64(<2 x i64> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
+// CHECK: ret <1 x i64> [[SHUFFLE_I]]
int64x1_t test_vget_high_s64(int64x2_t a) {
return vget_high_s64(a);
}
-// CHECK-LABEL: test_vget_high_f16
+// CHECK-LABEL: define <4 x half> @test_vget_high_f16(<8 x half> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <4 x half> [[SHUFFLE_I]]
float16x4_t test_vget_high_f16(float16x8_t a) {
return vget_high_f16(a);
}
-// CHECK-LABEL: test_vget_high_f32
+// CHECK-LABEL: define <2 x float> @test_vget_high_f32(<4 x float> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: ret <2 x float> [[SHUFFLE_I]]
float32x2_t test_vget_high_f32(float32x4_t a) {
return vget_high_f32(a);
}
-// CHECK-LABEL: test_vget_high_u8
+// CHECK-LABEL: define <8 x i8> @test_vget_high_u8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
uint8x8_t test_vget_high_u8(uint8x16_t a) {
return vget_high_u8(a);
}
-// CHECK-LABEL: test_vget_high_u16
+// CHECK-LABEL: define <4 x i16> @test_vget_high_u16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
uint16x4_t test_vget_high_u16(uint16x8_t a) {
return vget_high_u16(a);
}
-// CHECK-LABEL: test_vget_high_u32
+// CHECK-LABEL: define <2 x i32> @test_vget_high_u32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: ret <2 x i32> [[SHUFFLE_I]]
uint32x2_t test_vget_high_u32(uint32x4_t a) {
return vget_high_u32(a);
}
-// CHECK-LABEL: test_vget_high_u64
+// CHECK-LABEL: define <1 x i64> @test_vget_high_u64(<2 x i64> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
+// CHECK: ret <1 x i64> [[SHUFFLE_I]]
uint64x1_t test_vget_high_u64(uint64x2_t a) {
return vget_high_u64(a);
}
-// CHECK-LABEL: test_vget_high_p8
+// CHECK-LABEL: define <8 x i8> @test_vget_high_p8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
poly8x8_t test_vget_high_p8(poly8x16_t a) {
return vget_high_p8(a);
}
-// CHECK-LABEL: test_vget_high_p16
+// CHECK-LABEL: define <4 x i16> @test_vget_high_p16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
poly16x4_t test_vget_high_p16(poly16x8_t a) {
return vget_high_p16(a);
}
-// CHECK-LABEL: test_vget_lane_u8
-// CHECK: vmov
+// CHECK-LABEL: define i8 @test_vget_lane_u8(<8 x i8> %a) #0 {
+// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
+// CHECK: ret i8 [[VGET_LANE]]
uint8_t test_vget_lane_u8(uint8x8_t a) {
return vget_lane_u8(a, 7);
}
-// CHECK-LABEL: test_vget_lane_u16
-// CHECK: vmov
+// CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: ret i16 [[VGET_LANE]]
uint16_t test_vget_lane_u16(uint16x4_t a) {
return vget_lane_u16(a, 3);
}
-// CHECK-LABEL: test_vget_lane_u32
-// CHECK: mov
+// CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: ret i32 [[VGET_LANE]]
uint32_t test_vget_lane_u32(uint32x2_t a) {
return vget_lane_u32(a, 1);
}
-// CHECK-LABEL: test_vget_lane_s8
-// CHECK: vmov
+// CHECK-LABEL: define i8 @test_vget_lane_s8(<8 x i8> %a) #0 {
+// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
+// CHECK: ret i8 [[VGET_LANE]]
int8_t test_vget_lane_s8(int8x8_t a) {
return vget_lane_s8(a, 7);
}
-// CHECK-LABEL: test_vget_lane_s16
-// CHECK: vmov
+// CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: ret i16 [[VGET_LANE]]
int16_t test_vget_lane_s16(int16x4_t a) {
return vget_lane_s16(a, 3);
}
-// CHECK-LABEL: test_vget_lane_s32
-// CHECK: mov
+// CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: ret i32 [[VGET_LANE]]
int32_t test_vget_lane_s32(int32x2_t a) {
return vget_lane_s32(a, 1);
}
-// CHECK-LABEL: test_vget_lane_p8
-// CHECK: vmov
+// CHECK-LABEL: define i8 @test_vget_lane_p8(<8 x i8> %a) #0 {
+// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
+// CHECK: ret i8 [[VGET_LANE]]
poly8_t test_vget_lane_p8(poly8x8_t a) {
return vget_lane_p8(a, 7);
}
-// CHECK-LABEL: test_vget_lane_p16
-// CHECK: vmov
+// CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: ret i16 [[VGET_LANE]]
poly16_t test_vget_lane_p16(poly16x4_t a) {
return vget_lane_p16(a, 3);
}
-// CHECK-LABEL: test_vget_lane_f32
-// CHECK: vmov
+// CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+// CHECK: ret float [[VGET_LANE]]
float32_t test_vget_lane_f32(float32x2_t a) {
return vget_lane_f32(a, 1);
}
-// CHECK-LABEL: test_vget_lane_f16
-// CHECK: vmov
+// CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 {
+// CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8
+// CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2
+// CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
+// CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
+// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
+// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
+// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
+// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
+// CHECK: ret float [[CONV]]
float32_t test_vget_lane_f16(float16x4_t a) {
return vget_lane_f16(a, 1);
}
-// CHECK-LABEL: test_vgetq_lane_u8
-// CHECK: vmov
+// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 {
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
+// CHECK: ret i8 [[VGETQ_LANE]]
uint8_t test_vgetq_lane_u8(uint8x16_t a) {
return vgetq_lane_u8(a, 15);
}
-// CHECK-LABEL: test_vgetq_lane_u16
-// CHECK: vmov
+// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: ret i16 [[VGETQ_LANE]]
uint16_t test_vgetq_lane_u16(uint16x8_t a) {
return vgetq_lane_u16(a, 7);
}
-// CHECK-LABEL: test_vgetq_lane_u32
-// CHECK: vmov
+// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: ret i32 [[VGETQ_LANE]]
uint32_t test_vgetq_lane_u32(uint32x4_t a) {
return vgetq_lane_u32(a, 3);
}
-// CHECK-LABEL: test_vgetq_lane_s8
-// CHECK: vmov
+// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 {
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
+// CHECK: ret i8 [[VGETQ_LANE]]
int8_t test_vgetq_lane_s8(int8x16_t a) {
return vgetq_lane_s8(a, 15);
}
-// CHECK-LABEL: test_vgetq_lane_s16
-// CHECK: vmov
+// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: ret i16 [[VGETQ_LANE]]
int16_t test_vgetq_lane_s16(int16x8_t a) {
return vgetq_lane_s16(a, 7);
}
-// CHECK-LABEL: test_vgetq_lane_s32
-// CHECK: vmov
+// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: ret i32 [[VGETQ_LANE]]
int32_t test_vgetq_lane_s32(int32x4_t a) {
return vgetq_lane_s32(a, 3);
}
-// CHECK-LABEL: test_vgetq_lane_p8
-// CHECK: vmov
+// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 {
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
+// CHECK: ret i8 [[VGETQ_LANE]]
poly8_t test_vgetq_lane_p8(poly8x16_t a) {
return vgetq_lane_p8(a, 15);
}
-// CHECK-LABEL: test_vgetq_lane_p16
-// CHECK: vmov
+// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: ret i16 [[VGETQ_LANE]]
poly16_t test_vgetq_lane_p16(poly16x8_t a) {
return vgetq_lane_p16(a, 7);
}
-// CHECK-LABEL: test_vgetq_lane_f32
-// CHECK: vmov
+// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+// CHECK: ret float [[VGETQ_LANE]]
float32_t test_vgetq_lane_f32(float32x4_t a) {
return vgetq_lane_f32(a, 3);
}
-// CHECK-LABEL: test_vgetq_lane_f16
-// CHECK: vmov
+// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 {
+// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
+// CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
+// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
+// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
+// CHECK: store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
+// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
+// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
+// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
+// CHECK: ret float [[CONV]]
float32_t test_vgetq_lane_f16(float16x8_t a) {
return vgetq_lane_f16(a, 3);
}
-// CHECK-LABEL: test_vget_lane_s64
// The optimizer is able to remove all moves now.
+// CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK: ret i64 [[VGET_LANE]]
int64_t test_vget_lane_s64(int64x1_t a) {
return vget_lane_s64(a, 0);
}
-// CHECK-LABEL: test_vget_lane_u64
// The optimizer is able to remove all moves now.
+// CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK: ret i64 [[VGET_LANE]]
uint64_t test_vget_lane_u64(uint64x1_t a) {
return vget_lane_u64(a, 0);
}
-// CHECK-LABEL: test_vgetq_lane_s64
-// CHECK: vmov
+// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK: ret i64 [[VGETQ_LANE]]
int64_t test_vgetq_lane_s64(int64x2_t a) {
return vgetq_lane_s64(a, 1);
}
-// CHECK-LABEL: test_vgetq_lane_u64
-// CHECK: vmov
+// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK: ret i64 [[VGETQ_LANE]]
uint64_t test_vgetq_lane_u64(uint64x2_t a) {
return vgetq_lane_u64(a, 1);
}
-// CHECK-LABEL: test_vget_low_s8
+// CHECK-LABEL: define <8 x i8> @test_vget_low_s8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
int8x8_t test_vget_low_s8(int8x16_t a) {
return vget_low_s8(a);
}
-// CHECK-LABEL: test_vget_low_s16
+// CHECK-LABEL: define <4 x i16> @test_vget_low_s16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
int16x4_t test_vget_low_s16(int16x8_t a) {
return vget_low_s16(a);
}
-// CHECK-LABEL: test_vget_low_s32
+// CHECK-LABEL: define <2 x i32> @test_vget_low_s32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
+// CHECK: ret <2 x i32> [[SHUFFLE_I]]
int32x2_t test_vget_low_s32(int32x4_t a) {
return vget_low_s32(a);
}
-// CHECK-LABEL: test_vget_low_s64
+// CHECK-LABEL: define <1 x i64> @test_vget_low_s64(<2 x i64> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
+// CHECK: ret <1 x i64> [[SHUFFLE_I]]
int64x1_t test_vget_low_s64(int64x2_t a) {
return vget_low_s64(a);
}
-// CHECK-LABEL: test_vget_low_f16
+// CHECK-LABEL: define <4 x half> @test_vget_low_f16(<8 x half> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x half> [[SHUFFLE_I]]
float16x4_t test_vget_low_f16(float16x8_t a) {
return vget_low_f16(a);
}
-// CHECK-LABEL: test_vget_low_f32
+// CHECK-LABEL: define <2 x float> @test_vget_low_f32(<4 x float> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
+// CHECK: ret <2 x float> [[SHUFFLE_I]]
float32x2_t test_vget_low_f32(float32x4_t a) {
return vget_low_f32(a);
}
-// CHECK-LABEL: test_vget_low_u8
+// CHECK-LABEL: define <8 x i8> @test_vget_low_u8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
uint8x8_t test_vget_low_u8(uint8x16_t a) {
return vget_low_u8(a);
}
-// CHECK-LABEL: test_vget_low_u16
+// CHECK-LABEL: define <4 x i16> @test_vget_low_u16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
uint16x4_t test_vget_low_u16(uint16x8_t a) {
return vget_low_u16(a);
}
-// CHECK-LABEL: test_vget_low_u32
+// CHECK-LABEL: define <2 x i32> @test_vget_low_u32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
+// CHECK: ret <2 x i32> [[SHUFFLE_I]]
uint32x2_t test_vget_low_u32(uint32x4_t a) {
return vget_low_u32(a);
}
-// CHECK-LABEL: test_vget_low_u64
+// CHECK-LABEL: define <1 x i64> @test_vget_low_u64(<2 x i64> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
+// CHECK: ret <1 x i64> [[SHUFFLE_I]]
uint64x1_t test_vget_low_u64(uint64x2_t a) {
return vget_low_u64(a);
}
-// CHECK-LABEL: test_vget_low_p8
+// CHECK-LABEL: define <8 x i8> @test_vget_low_p8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
poly8x8_t test_vget_low_p8(poly8x16_t a) {
return vget_low_p8(a);
}
-// CHECK-LABEL: test_vget_low_p16
+// CHECK-LABEL: define <4 x i16> @test_vget_low_p16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
poly16x4_t test_vget_low_p16(poly16x8_t a) {
return vget_low_p16(a);
}
-// CHECK-LABEL: test_vhadd_s8
-// CHECK: vhadd.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VHADD_V_I]]
int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) {
return vhadd_s8(a, b);
}
-// CHECK-LABEL: test_vhadd_s16
-// CHECK: vhadd.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
+// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) {
return vhadd_s16(a, b);
}
-// CHECK-LABEL: test_vhadd_s32
-// CHECK: vhadd.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
+// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) {
return vhadd_s32(a, b);
}
-// CHECK-LABEL: test_vhadd_u8
-// CHECK: vhadd.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VHADD_V_I]]
uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) {
return vhadd_u8(a, b);
}
-// CHECK-LABEL: test_vhadd_u16
-// CHECK: vhadd.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
+// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) {
return vhadd_u16(a, b);
}
-// CHECK-LABEL: test_vhadd_u32
-// CHECK: vhadd.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
+// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) {
return vhadd_u32(a, b);
}
-// CHECK-LABEL: test_vhaddq_s8
-// CHECK: vhadd.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VHADDQ_V_I]]
int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) {
return vhaddq_s8(a, b);
}
-// CHECK-LABEL: test_vhaddq_s16
-// CHECK: vhadd.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
+// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) {
return vhaddq_s16(a, b);
}
-// CHECK-LABEL: test_vhaddq_s32
-// CHECK: vhadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
+// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) {
return vhaddq_s32(a, b);
}
-// CHECK-LABEL: test_vhaddq_u8
-// CHECK: vhadd.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VHADDQ_V_I]]
uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) {
return vhaddq_u8(a, b);
}
-// CHECK-LABEL: test_vhaddq_u16
-// CHECK: vhadd.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
+// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) {
return vhaddq_u16(a, b);
}
-// CHECK-LABEL: test_vhaddq_u32
-// CHECK: vhadd.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
+// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) {
return vhaddq_u32(a, b);
}
-// CHECK-LABEL: test_vhsub_s8
-// CHECK: vhsub.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VHSUB_V_I]]
int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) {
return vhsub_s8(a, b);
}
-// CHECK-LABEL: test_vhsub_s16
-// CHECK: vhsub.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
+// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) {
return vhsub_s16(a, b);
}
-// CHECK-LABEL: test_vhsub_s32
-// CHECK: vhsub.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
+// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) {
return vhsub_s32(a, b);
}
-// CHECK-LABEL: test_vhsub_u8
-// CHECK: vhsub.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VHSUB_V_I]]
uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) {
return vhsub_u8(a, b);
}
-// CHECK-LABEL: test_vhsub_u16
-// CHECK: vhsub.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
+// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) {
return vhsub_u16(a, b);
}
-// CHECK-LABEL: test_vhsub_u32
-// CHECK: vhsub.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
+// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) {
return vhsub_u32(a, b);
}
-// CHECK-LABEL: test_vhsubq_s8
-// CHECK: vhsub.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) {
return vhsubq_s8(a, b);
}
-// CHECK-LABEL: test_vhsubq_s16
-// CHECK: vhsub.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
+// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) {
return vhsubq_s16(a, b);
}
-// CHECK-LABEL: test_vhsubq_s32
-// CHECK: vhsub.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
+// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) {
return vhsubq_s32(a, b);
}
-// CHECK-LABEL: test_vhsubq_u8
-// CHECK: vhsub.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) {
return vhsubq_u8(a, b);
}
-// CHECK-LABEL: test_vhsubq_u16
-// CHECK: vhsub.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
+// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) {
return vhsubq_u16(a, b);
}
-// CHECK-LABEL: test_vhsubq_u32
-// CHECK: vhsub.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
+// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) {
return vhsubq_u32(a, b);
}
-// CHECK-LABEL: test_vld1q_u8
-// CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
+// CHECK: ret <16 x i8> [[TMP1]]
uint8x16_t test_vld1q_u8(uint8_t const * a) {
return vld1q_u8(a);
}
-// CHECK-LABEL: test_vld1q_u16
-// CHECK: vld1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vld1q_u16(uint16_t const * a) {
return vld1q_u16(a);
}
-// CHECK-LABEL: test_vld1q_u32
-// CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
+// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vld1q_u32(uint32_t const * a) {
return vld1q_u32(a);
}
-// CHECK-LABEL: test_vld1q_u64
-// CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}]
+// CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
+// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vld1q_u64(uint64_t const * a) {
return vld1q_u64(a);
}
-// CHECK-LABEL: test_vld1q_s8
-// CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
+// CHECK: ret <16 x i8> [[TMP1]]
int8x16_t test_vld1q_s8(int8_t const * a) {
return vld1q_s8(a);
}
-// CHECK-LABEL: test_vld1q_s16
-// CHECK: vld1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vld1q_s16(int16_t const * a) {
return vld1q_s16(a);
}
-// CHECK-LABEL: test_vld1q_s32
-// CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
+// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vld1q_s32(int32_t const * a) {
return vld1q_s32(a);
}
-// CHECK-LABEL: test_vld1q_s64
-// CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}]
+// CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
+// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vld1q_s64(int64_t const * a) {
return vld1q_s64(a);
}
-// CHECK-LABEL: test_vld1q_f16
-// CHECK: vld1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
+// CHECK: ret <8 x half> [[TMP3]]
float16x8_t test_vld1q_f16(float16_t const * a) {
return vld1q_f16(a);
}
-// CHECK-LABEL: test_vld1q_f32
-// CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
+// CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]]
+// CHECK: ret <4 x float> [[TMP2]]
float32x4_t test_vld1q_f32(float32_t const * a) {
return vld1q_f32(a);
}
-// CHECK-LABEL: test_vld1q_p8
-// CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
+// CHECK: ret <16 x i8> [[TMP1]]
poly8x16_t test_vld1q_p8(poly8_t const * a) {
return vld1q_p8(a);
}
-// CHECK-LABEL: test_vld1q_p16
-// CHECK: vld1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
+// CHECK: ret <8 x i16> [[TMP2]]
poly16x8_t test_vld1q_p16(poly16_t const * a) {
return vld1q_p16(a);
}
-// CHECK-LABEL: test_vld1_u8
-// CHECK: vld1.8 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
+// CHECK: ret <8 x i8> [[TMP1]]
uint8x8_t test_vld1_u8(uint8_t const * a) {
return vld1_u8(a);
}
-// CHECK-LABEL: test_vld1_u16
-// CHECK: vld1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vld1_u16(uint16_t const * a) {
return vld1_u16(a);
}
-// CHECK-LABEL: test_vld1_u32
-// CHECK: vld1.32 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
+// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vld1_u32(uint32_t const * a) {
return vld1_u32(a);
}
-// CHECK-LABEL: test_vld1_u64
-// CHECK: vld1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}]
+// CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
+// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vld1_u64(uint64_t const * a) {
return vld1_u64(a);
}
-// CHECK-LABEL: test_vld1_s8
-// CHECK: vld1.8 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
+// CHECK: ret <8 x i8> [[TMP1]]
int8x8_t test_vld1_s8(int8_t const * a) {
return vld1_s8(a);
}
-// CHECK-LABEL: test_vld1_s16
-// CHECK: vld1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vld1_s16(int16_t const * a) {
return vld1_s16(a);
}
-// CHECK-LABEL: test_vld1_s32
-// CHECK: vld1.32 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
+// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vld1_s32(int32_t const * a) {
return vld1_s32(a);
}
-// CHECK-LABEL: test_vld1_s64
-// CHECK: vld1.64 {d{{[0-9]+}}}, [r{{[0-9]+}}{{(:64)?}}]
+// CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
+// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vld1_s64(int64_t const * a) {
return vld1_s64(a);
}
-// CHECK-LABEL: test_vld1_f16
-// CHECK: vld1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
+// CHECK: ret <4 x half> [[TMP3]]
float16x4_t test_vld1_f16(float16_t const * a) {
return vld1_f16(a);
}
-// CHECK-LABEL: test_vld1_f32
-// CHECK: vld1.32 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
+// CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]]
+// CHECK: ret <2 x float> [[TMP2]]
float32x2_t test_vld1_f32(float32_t const * a) {
return vld1_f32(a);
}
-// CHECK-LABEL: test_vld1_p8
-// CHECK: vld1.8 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
+// CHECK: ret <8 x i8> [[TMP1]]
poly8x8_t test_vld1_p8(poly8_t const * a) {
return vld1_p8(a);
}
-// CHECK-LABEL: test_vld1_p16
-// CHECK: vld1.16 {d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
+// CHECK: ret <4 x i16> [[TMP2]]
poly16x4_t test_vld1_p16(poly16_t const * a) {
return vld1_p16(a);
}
-// CHECK-LABEL: test_vld1q_dup_u8
-// CHECK: vld1.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_u8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK: ret <16 x i8> [[LANE]]
uint8x16_t test_vld1q_dup_u8(uint8_t const * a) {
return vld1q_dup_u8(a);
}
-// CHECK-LABEL: test_vld1q_dup_u16
-// CHECK: vld1.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_u16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
+// CHECK: ret <8 x i16> [[LANE]]
uint16x8_t test_vld1q_dup_u16(uint16_t const * a) {
return vld1q_dup_u16(a);
}
-// CHECK-LABEL: test_vld1q_dup_u32
-// CHECK: vld1.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_dup_u32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
+// CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK: ret <4 x i32> [[LANE]]
uint32x4_t test_vld1q_dup_u32(uint32_t const * a) {
return vld1q_dup_u32(a);
}
-// CHECK-LABEL: test_vld1q_dup_u64
-// CHECK: {{ldr|vldr|vmov}}
+// CHECK-LABEL: define <2 x i64> @test_vld1q_dup_u64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
+// CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
+// CHECK: ret <2 x i64> [[LANE]]
uint64x2_t test_vld1q_dup_u64(uint64_t const * a) {
return vld1q_dup_u64(a);
}
-// CHECK-LABEL: test_vld1q_dup_s8
-// CHECK: vld1.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_s8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK: ret <16 x i8> [[LANE]]
int8x16_t test_vld1q_dup_s8(int8_t const * a) {
return vld1q_dup_s8(a);
}
-// CHECK-LABEL: test_vld1q_dup_s16
-// CHECK: vld1.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_s16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
+// CHECK: ret <8 x i16> [[LANE]]
int16x8_t test_vld1q_dup_s16(int16_t const * a) {
return vld1q_dup_s16(a);
}
-// CHECK-LABEL: test_vld1q_dup_s32
-// CHECK: vld1.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_dup_s32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
+// CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK: ret <4 x i32> [[LANE]]
int32x4_t test_vld1q_dup_s32(int32_t const * a) {
return vld1q_dup_s32(a);
}
-// CHECK-LABEL: test_vld1q_dup_s64
-// CHECK: {{ldr|vldr|vmov}}
+// CHECK-LABEL: define <2 x i64> @test_vld1q_dup_s64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
+// CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
+// CHECK: ret <2 x i64> [[LANE]]
int64x2_t test_vld1q_dup_s64(int64_t const * a) {
return vld1q_dup_s64(a);
}
-// CHECK-LABEL: test_vld1q_dup_f16
-// CHECK: vld1.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <8 x half> @test_vld1q_dup_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half>
+// CHECK: ret <8 x half> [[TMP4]]
float16x8_t test_vld1q_dup_f16(float16_t const * a) {
return vld1q_dup_f16(a);
}
-// CHECK-LABEL: test_vld1q_dup_f32
-// CHECK: vld1.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <4 x float> @test_vld1q_dup_f32(float* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
+// CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK: ret <4 x float> [[LANE]]
float32x4_t test_vld1q_dup_f32(float32_t const * a) {
return vld1q_dup_f32(a);
}
-// CHECK-LABEL: test_vld1q_dup_p8
-// CHECK: vld1.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_p8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK: ret <16 x i8> [[LANE]]
poly8x16_t test_vld1q_dup_p8(poly8_t const * a) {
return vld1q_dup_p8(a);
}
-// CHECK-LABEL: test_vld1q_dup_p16
-// CHECK: vld1.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_p16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
+// CHECK: ret <8 x i16> [[LANE]]
poly16x8_t test_vld1q_dup_p16(poly16_t const * a) {
return vld1q_dup_p16(a);
}
-// CHECK-LABEL: test_vld1_dup_u8
-// CHECK: vld1.8 {d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK: ret <8 x i8> [[LANE]]
uint8x8_t test_vld1_dup_u8(uint8_t const * a) {
return vld1_dup_u8(a);
}
-// CHECK-LABEL: test_vld1_dup_u16
-// CHECK: vld1.16 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK: ret <4 x i16> [[LANE]]
uint16x4_t test_vld1_dup_u16(uint16_t const * a) {
return vld1_dup_u16(a);
}
-// CHECK-LABEL: test_vld1_dup_u32
-// CHECK: vld1.32 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
+// CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
+// CHECK: ret <2 x i32> [[LANE]]
uint32x2_t test_vld1_dup_u32(uint32_t const * a) {
return vld1_dup_u32(a);
}
-// CHECK-LABEL: test_vld1_dup_u64
-// CHECK: {{ldr|vldr|vmov}}
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
+// CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK: ret <1 x i64> [[LANE]]
uint64x1_t test_vld1_dup_u64(uint64_t const * a) {
return vld1_dup_u64(a);
}
-// CHECK-LABEL: test_vld1_dup_s8
-// CHECK: vld1.8 {d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK: ret <8 x i8> [[LANE]]
int8x8_t test_vld1_dup_s8(int8_t const * a) {
return vld1_dup_s8(a);
}
-// CHECK-LABEL: test_vld1_dup_s16
-// CHECK: vld1.16 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK: ret <4 x i16> [[LANE]]
int16x4_t test_vld1_dup_s16(int16_t const * a) {
return vld1_dup_s16(a);
}
-// CHECK-LABEL: test_vld1_dup_s32
-// CHECK: vld1.32 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
+// CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
+// CHECK: ret <2 x i32> [[LANE]]
int32x2_t test_vld1_dup_s32(int32_t const * a) {
return vld1_dup_s32(a);
}
-// CHECK-LABEL: test_vld1_dup_s64
-// CHECK: {{ldr|vldr|vmov}}
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
+// CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK: ret <1 x i64> [[LANE]]
int64x1_t test_vld1_dup_s64(int64_t const * a) {
return vld1_dup_s64(a);
}
-// CHECK-LABEL: test_vld1_dup_f16
-// CHECK: vld1.16 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half>
+// CHECK: ret <4 x half> [[TMP4]]
float16x4_t test_vld1_dup_f16(float16_t const * a) {
return vld1_dup_f16(a);
}
-// CHECK-LABEL: test_vld1_dup_f32
-// CHECK: vld1.32 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
+// CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
+// CHECK: ret <2 x float> [[LANE]]
float32x2_t test_vld1_dup_f32(float32_t const * a) {
return vld1_dup_f32(a);
}
-// CHECK-LABEL: test_vld1_dup_p8
-// CHECK: vld1.8 {d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK: ret <8 x i8> [[LANE]]
poly8x8_t test_vld1_dup_p8(poly8_t const * a) {
return vld1_dup_p8(a);
}
-// CHECK-LABEL: test_vld1_dup_p16
-// CHECK: vld1.16 {d{{[0-9]+}}[]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
+// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
+// CHECK: ret <4 x i16> [[LANE]]
poly16x4_t test_vld1_dup_p16(poly16_t const * a) {
return vld1_dup_p16(a);
}
-// CHECK-LABEL: test_vld1q_lane_u8
-// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_u8(i8* %a, <16 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
+// CHECK: ret <16 x i8> [[VLD1_LANE]]
uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) {
return vld1q_lane_u8(a, b, 15);
}
-// CHECK-LABEL: test_vld1q_lane_u16
-// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_u16(i16* %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
+// CHECK: ret <8 x i16> [[VLD1_LANE]]
uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) {
return vld1q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: test_vld1q_lane_u32
-// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_lane_u32(i32* %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
+// CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
+// CHECK: ret <4 x i32> [[VLD1_LANE]]
uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) {
return vld1q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: test_vld1q_lane_u64
-// CHECK: {{ldr|vldr|vmov}}
+// CHECK-LABEL: define <2 x i64> @test_vld1q_lane_u64(i64* %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
+// CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1
+// CHECK: ret <2 x i64> [[VLD1_LANE]]
uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) {
return vld1q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: test_vld1q_lane_s8
-// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
+// CHECK: ret <16 x i8> [[VLD1_LANE]]
int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) {
return vld1q_lane_s8(a, b, 15);
}
-// CHECK-LABEL: test_vld1q_lane_s16
-// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
+// CHECK: ret <8 x i16> [[VLD1_LANE]]
int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) {
return vld1q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: test_vld1q_lane_s32
-// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
+// CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
+// CHECK: ret <4 x i32> [[VLD1_LANE]]
int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) {
return vld1q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: test_vld1q_lane_s64
-// CHECK: {{ldr|vldr|vmov}}
+// CHECK-LABEL: define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
+// CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1
+// CHECK: ret <2 x i64> [[VLD1_LANE]]
int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
return vld1q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: test_vld1q_lane_f16
-// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <8 x half> @test_vld1q_lane_f16(half* %a, <8 x half> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[VLD1_LANE]] to <8 x half>
+// CHECK: ret <8 x half> [[TMP5]]
float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
return vld1q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: test_vld1q_lane_f32
-// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
+// CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3
+// CHECK: ret <4 x float> [[VLD1_LANE]]
float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) {
return vld1q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: test_vld1q_lane_p8
-// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_p8(i8* %a, <16 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
+// CHECK: ret <16 x i8> [[VLD1_LANE]]
poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) {
return vld1q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: test_vld1q_lane_p16
-// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_p16(i16* %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
+// CHECK: ret <8 x i16> [[VLD1_LANE]]
poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) {
return vld1q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: test_vld1_lane_u8
-// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
+// CHECK: ret <8 x i8> [[VLD1_LANE]]
uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) {
return vld1_lane_u8(a, b, 7);
}
-// CHECK-LABEL: test_vld1_lane_u16
-// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
+// CHECK: ret <4 x i16> [[VLD1_LANE]]
uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) {
return vld1_lane_u16(a, b, 3);
}
-// CHECK-LABEL: test_vld1_lane_u32
-// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
+// CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
+// CHECK: ret <2 x i32> [[VLD1_LANE]]
uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) {
return vld1_lane_u32(a, b, 1);
}
-// CHECK-LABEL: test_vld1_lane_u64
-// CHECK: {{ldr|vldr|vmov}}
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
+// CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
+// CHECK: ret <1 x i64> [[VLD1_LANE]]
uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) {
return vld1_lane_u64(a, b, 0);
}
-// CHECK-LABEL: test_vld1_lane_s8
-// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
+// CHECK: ret <8 x i8> [[VLD1_LANE]]
int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) {
return vld1_lane_s8(a, b, 7);
}
-// CHECK-LABEL: test_vld1_lane_s16
-// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
+// CHECK: ret <4 x i16> [[VLD1_LANE]]
int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) {
return vld1_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vld1_lane_s32
-// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
+// CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
+// CHECK: ret <2 x i32> [[VLD1_LANE]]
int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) {
return vld1_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vld1_lane_s64
-// CHECK: {{ldr|vldr|vmov}}
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
+// CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
+// CHECK: ret <1 x i64> [[VLD1_LANE]]
int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
return vld1_lane_s64(a, b, 0);
}
-// CHECK-LABEL: test_vld1_lane_f16
-// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[VLD1_LANE]] to <4 x half>
+// CHECK: ret <4 x half> [[TMP5]]
float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
return vld1_lane_f16(a, b, 3);
}
-// CHECK-LABEL: test_vld1_lane_f32
-// CHECK: vld1.32 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:32]
+// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
+// CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1
+// CHECK: ret <2 x float> [[VLD1_LANE]]
float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) {
return vld1_lane_f32(a, b, 1);
}
-// CHECK-LABEL: test_vld1_lane_p8
-// CHECK: vld1.8 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = load i8, i8* %a
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
+// CHECK: ret <8 x i8> [[VLD1_LANE]]
poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) {
return vld1_lane_p8(a, b, 7);
}
-// CHECK-LABEL: test_vld1_lane_p16
-// CHECK: vld1.16 {d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}:16]
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
+// CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]]
+// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
+// CHECK: ret <4 x i16> [[VLD1_LANE]]
poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) {
return vld1_lane_p16(a, b, 3);
}
-// CHECK-LABEL: test_vld2q_u8
-// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint8x16x2_t [[TMP5]]
uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
return vld2q_u8(a);
}
-// CHECK-LABEL: test_vld2q_u16
-// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x2_t [[TMP6]]
uint16x8x2_t test_vld2q_u16(uint16_t const * a) {
return vld2q_u16(a);
}
-// CHECK-LABEL: test_vld2q_u32
-// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x2_t [[TMP6]]
uint32x4x2_t test_vld2q_u32(uint32_t const * a) {
return vld2q_u32(a);
}
-// CHECK-LABEL: test_vld2q_s8
-// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int8x16x2_t [[TMP5]]
int8x16x2_t test_vld2q_s8(int8_t const * a) {
return vld2q_s8(a);
}
-// CHECK-LABEL: test_vld2q_s16
-// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x2_t [[TMP6]]
int16x8x2_t test_vld2q_s16(int16_t const * a) {
return vld2q_s16(a);
}
-// CHECK-LABEL: test_vld2q_s32
-// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x2_t [[TMP6]]
int32x4x2_t test_vld2q_s32(int32_t const * a) {
return vld2q_s32(a);
}
-// CHECK-LABEL: test_vld2q_f16
-// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x2_t [[TMP6]]
float16x8x2_t test_vld2q_f16(float16_t const * a) {
return vld2q_f16(a);
}
-// CHECK-LABEL: test_vld2q_f32
-// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
+// CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x2_t [[TMP6]]
float32x4x2_t test_vld2q_f32(float32_t const * a) {
return vld2q_f32(a);
}
-// CHECK-LABEL: test_vld2q_p8
-// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly8x16x2_t [[TMP5]]
poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
return vld2q_p8(a);
}
-// CHECK-LABEL: test_vld2q_p16
-// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x2_t [[TMP6]]
poly16x8x2_t test_vld2q_p16(poly16_t const * a) {
return vld2q_p16(a);
}
-// CHECK-LABEL: test_vld2_u8
-// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x2_t [[TMP5]]
uint8x8x2_t test_vld2_u8(uint8_t const * a) {
return vld2_u8(a);
}
-// CHECK-LABEL: test_vld2_u16
-// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x2_t [[TMP6]]
uint16x4x2_t test_vld2_u16(uint16_t const * a) {
return vld2_u16(a);
}
-// CHECK-LABEL: test_vld2_u32
-// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x2_t [[TMP6]]
uint32x2x2_t test_vld2_u32(uint32_t const * a) {
return vld2_u32(a);
}
-// CHECK-LABEL: test_vld2_u64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x2_t [[TMP6]]
uint64x1x2_t test_vld2_u64(uint64_t const * a) {
return vld2_u64(a);
}
-// CHECK-LABEL: test_vld2_s8
-// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x2_t [[TMP5]]
int8x8x2_t test_vld2_s8(int8_t const * a) {
return vld2_s8(a);
}
-// CHECK-LABEL: test_vld2_s16
-// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x2_t [[TMP6]]
int16x4x2_t test_vld2_s16(int16_t const * a) {
return vld2_s16(a);
}
-// CHECK-LABEL: test_vld2_s32
-// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x2_t [[TMP6]]
int32x2x2_t test_vld2_s32(int32_t const * a) {
return vld2_s32(a);
}
-// CHECK-LABEL: test_vld2_s64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x2_t [[TMP6]]
int64x1x2_t test_vld2_s64(int64_t const * a) {
return vld2_s64(a);
}
-// CHECK-LABEL: test_vld2_f16
-// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x2_t [[TMP6]]
float16x4x2_t test_vld2_f16(float16_t const * a) {
return vld2_f16(a);
}
-// CHECK-LABEL: test_vld2_f32
-// CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
+// CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x2_t [[TMP6]]
float32x2x2_t test_vld2_f32(float32_t const * a) {
return vld2_f32(a);
}
-// CHECK-LABEL: test_vld2_p8
-// CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x2_t [[TMP5]]
poly8x8x2_t test_vld2_p8(poly8_t const * a) {
return vld2_p8(a);
}
-// CHECK-LABEL: test_vld2_p16
-// CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x2_t [[TMP6]]
poly16x4x2_t test_vld2_p16(poly16_t const * a) {
return vld2_p16(a);
}
-// CHECK-LABEL: test_vld2_dup_u8
-// CHECK: vld2.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_dup_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x2_t [[TMP4]]
uint8x8x2_t test_vld2_dup_u8(uint8_t const * a) {
return vld2_dup_u8(a);
}
-// CHECK-LABEL: test_vld2_dup_u16
-// CHECK: vld2.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_dup_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x2_t [[TMP6]]
uint16x4x2_t test_vld2_dup_u16(uint16_t const * a) {
return vld2_dup_u16(a);
}
-// CHECK-LABEL: test_vld2_dup_u32
-// CHECK: vld2.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_dup_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x2_t [[TMP6]]
uint32x2x2_t test_vld2_dup_u32(uint32_t const * a) {
return vld2_dup_u32(a);
}
-// CHECK-LABEL: test_vld2_dup_u64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_dup_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x2_t [[TMP6]]
uint64x1x2_t test_vld2_dup_u64(uint64_t const * a) {
return vld2_dup_u64(a);
}
-// CHECK-LABEL: test_vld2_dup_s8
-// CHECK: vld2.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_dup_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x2_t [[TMP4]]
int8x8x2_t test_vld2_dup_s8(int8_t const * a) {
return vld2_dup_s8(a);
}
-// CHECK-LABEL: test_vld2_dup_s16
-// CHECK: vld2.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_dup_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x2_t [[TMP6]]
int16x4x2_t test_vld2_dup_s16(int16_t const * a) {
return vld2_dup_s16(a);
}
-// CHECK-LABEL: test_vld2_dup_s32
-// CHECK: vld2.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_dup_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x2_t [[TMP6]]
int32x2x2_t test_vld2_dup_s32(int32_t const * a) {
return vld2_dup_s32(a);
}
-// CHECK-LABEL: test_vld2_dup_s64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_dup_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x2_t [[TMP6]]
int64x1x2_t test_vld2_dup_s64(int64_t const * a) {
return vld2_dup_s64(a);
}
-// CHECK-LABEL: test_vld2_dup_f16
-// CHECK: vld2.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_dup_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x2_t [[TMP6]]
float16x4x2_t test_vld2_dup_f16(float16_t const * a) {
return vld2_dup_f16(a);
}
-// CHECK-LABEL: test_vld2_dup_f32
-// CHECK: vld2.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_dup_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
+// CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x2_t [[TMP6]]
float32x2x2_t test_vld2_dup_f32(float32_t const * a) {
return vld2_dup_f32(a);
}
-// CHECK-LABEL: test_vld2_dup_p8
-// CHECK: vld2.8 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_dup_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x2_t [[TMP4]]
poly8x8x2_t test_vld2_dup_p8(poly8_t const * a) {
return vld2_dup_p8(a);
}
-// CHECK-LABEL: test_vld2_dup_p16
-// CHECK: vld2.16 {d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_dup_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x2_t [[TMP6]]
poly16x4x2_t test_vld2_dup_p16(poly16_t const * a) {
return vld2_dup_p16(a);
}
-// CHECK-LABEL: test_vld2q_lane_u16
-// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x2_t [[TMP1]]3
uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) {
return vld2q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: test_vld2q_lane_u32
-// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
+// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i8(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x2_t [[TMP1]]3
uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) {
return vld2q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: test_vld2q_lane_s16
-// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x2_t [[TMP1]]3
int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) {
return vld2q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: test_vld2q_lane_s32
-// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
+// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i8(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x2_t [[TMP1]]3
int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
return vld2q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: test_vld2q_lane_f16
-// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x2_t [[TMP1]]3
float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
return vld2q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: test_vld2q_lane_f32
-// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK: [[VLD2_LANE:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0i8(<4 x float> [[TMP8]], <4 x float> [[TMP9]], i64 3, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float> } [[VLD2_LANE]], { <4 x float>, <4 x float> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x2_t [[TMP1]]3
float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) {
return vld2q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: test_vld2q_lane_p16
-// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x2_t [[TMP1]]3
poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) {
return vld2q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: test_vld2_lane_u8
-// CHECK: vld2.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a)
+// CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]]
+// CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x2_t [[TMP8]]
uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) {
return vld2_lane_u8(a, b, 7);
}
-// CHECK-LABEL: test_vld2_lane_u16
-// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x2_t [[TMP1]]3
uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) {
return vld2_lane_u16(a, b, 3);
}
-// CHECK-LABEL: test_vld2_lane_u32
-// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i8(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x2_t [[TMP1]]3
uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) {
return vld2_lane_u32(a, b, 1);
}
-// CHECK-LABEL: test_vld2_lane_s8
-// CHECK: vld2.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a)
+// CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]]
+// CHECK: [[TMP6:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x2_t [[TMP8]]
int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) {
return vld2_lane_s8(a, b, 7);
}
-// CHECK-LABEL: test_vld2_lane_s16
-// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x2_t [[TMP1]]3
int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) {
return vld2_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vld2_lane_s32
-// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i8(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x2_t [[TMP1]]3
int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
return vld2_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vld2_lane_f16
-// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x2_t [[TMP1]]3
float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
return vld2_lane_f16(a, b, 3);
}
-// CHECK-LABEL: test_vld2_lane_f32
-// CHECK: vld2.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK: [[VLD2_LANE:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0i8(<2 x float> [[TMP8]], <2 x float> [[TMP9]], i64 1, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE]], { <2 x float>, <2 x float> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x2_t [[TMP1]]3
float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) {
return vld2_lane_f32(a, b, 1);
}
-// CHECK-LABEL: test_vld2_lane_p8
-// CHECK: vld2.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a)
+// CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]]
+// CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x2_t [[TMP8]]
poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) {
return vld2_lane_p8(a, b, 7);
}
-// CHECK-LABEL: test_vld2_lane_p16
-// CHECK: vld2.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]])
+// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP1]]0
+// CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false)
+// CHECK: [[TMP13:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x2_t [[TMP1]]3
poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) {
return vld2_lane_p16(a, b, 3);
}
-// CHECK-LABEL: test_vld3q_u8
-// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint8x16x3_t [[TMP5]]
uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
return vld3q_u8(a);
}
-// CHECK-LABEL: test_vld3q_u16
-// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x3_t [[TMP6]]
uint16x8x3_t test_vld3q_u16(uint16_t const * a) {
return vld3q_u16(a);
}
-// CHECK-LABEL: test_vld3q_u32
-// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x3_t [[TMP6]]
uint32x4x3_t test_vld3q_u32(uint32_t const * a) {
return vld3q_u32(a);
}
-// CHECK-LABEL: test_vld3q_s8
-// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int8x16x3_t [[TMP5]]
int8x16x3_t test_vld3q_s8(int8_t const * a) {
return vld3q_s8(a);
}
-// CHECK-LABEL: test_vld3q_s16
-// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x3_t [[TMP6]]
int16x8x3_t test_vld3q_s16(int16_t const * a) {
return vld3q_s16(a);
}
-// CHECK-LABEL: test_vld3q_s32
-// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x3_t [[TMP6]]
int32x4x3_t test_vld3q_s32(int32_t const * a) {
return vld3q_s32(a);
}
-// CHECK-LABEL: test_vld3q_f16
-// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x3_t [[TMP6]]
float16x8x3_t test_vld3q_f16(float16_t const * a) {
return vld3q_f16(a);
}
-// CHECK-LABEL: test_vld3q_f32
-// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
+// CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x3_t [[TMP6]]
float32x4x3_t test_vld3q_f32(float32_t const * a) {
return vld3q_f32(a);
}
-// CHECK-LABEL: test_vld3q_p8
-// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly8x16x3_t [[TMP5]]
poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
return vld3q_p8(a);
}
-// CHECK-LABEL: test_vld3q_p16
-// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x3_t [[TMP6]]
poly16x8x3_t test_vld3q_p16(poly16_t const * a) {
return vld3q_p16(a);
}
-// CHECK-LABEL: test_vld3_u8
-// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x3_t [[TMP5]]
uint8x8x3_t test_vld3_u8(uint8_t const * a) {
return vld3_u8(a);
}
-// CHECK-LABEL: test_vld3_u16
-// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x3_t [[TMP6]]
uint16x4x3_t test_vld3_u16(uint16_t const * a) {
return vld3_u16(a);
}
-// CHECK-LABEL: test_vld3_u32
-// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x3_t [[TMP6]]
uint32x2x3_t test_vld3_u32(uint32_t const * a) {
return vld3_u32(a);
}
-// CHECK-LABEL: test_vld3_u64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x3_t [[TMP6]]
uint64x1x3_t test_vld3_u64(uint64_t const * a) {
return vld3_u64(a);
}
-// CHECK-LABEL: test_vld3_s8
-// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x3_t [[TMP5]]
int8x8x3_t test_vld3_s8(int8_t const * a) {
return vld3_s8(a);
}
-// CHECK-LABEL: test_vld3_s16
-// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x3_t [[TMP6]]
int16x4x3_t test_vld3_s16(int16_t const * a) {
return vld3_s16(a);
}
-// CHECK-LABEL: test_vld3_s32
-// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x3_t [[TMP6]]
int32x2x3_t test_vld3_s32(int32_t const * a) {
return vld3_s32(a);
}
-// CHECK-LABEL: test_vld3_s64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x3_t [[TMP6]]
int64x1x3_t test_vld3_s64(int64_t const * a) {
return vld3_s64(a);
}
-// CHECK-LABEL: test_vld3_f16
-// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x3_t [[TMP6]]
float16x4x3_t test_vld3_f16(float16_t const * a) {
return vld3_f16(a);
}
-// CHECK-LABEL: test_vld3_f32
-// CHECK: vld3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
+// CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x3_t [[TMP6]]
float32x2x3_t test_vld3_f32(float32_t const * a) {
return vld3_f32(a);
}
-// CHECK-LABEL: test_vld3_p8
-// CHECK: vld3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x3_t [[TMP5]]
poly8x8x3_t test_vld3_p8(poly8_t const * a) {
return vld3_p8(a);
}
-// CHECK-LABEL: test_vld3_p16
-// CHECK: vld3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x3_t [[TMP6]]
poly16x4x3_t test_vld3_p16(poly16_t const * a) {
return vld3_p16(a);
}
-// CHECK-LABEL: test_vld3_dup_u8
-// CHECK: vld3.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_dup_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x3_t [[TMP4]]
uint8x8x3_t test_vld3_dup_u8(uint8_t const * a) {
return vld3_dup_u8(a);
}
-// CHECK-LABEL: test_vld3_dup_u16
-// CHECK: vld3.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_dup_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x3_t [[TMP6]]
uint16x4x3_t test_vld3_dup_u16(uint16_t const * a) {
return vld3_dup_u16(a);
}
-// CHECK-LABEL: test_vld3_dup_u32
-// CHECK: vld3.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_dup_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x3_t [[TMP6]]
uint32x2x3_t test_vld3_dup_u32(uint32_t const * a) {
return vld3_dup_u32(a);
}
-// CHECK-LABEL: test_vld3_dup_u64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_dup_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x3_t [[TMP6]]
uint64x1x3_t test_vld3_dup_u64(uint64_t const * a) {
return vld3_dup_u64(a);
}
-// CHECK-LABEL: test_vld3_dup_s8
-// CHECK: vld3.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_dup_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x3_t [[TMP4]]
int8x8x3_t test_vld3_dup_s8(int8_t const * a) {
return vld3_dup_s8(a);
}
-// CHECK-LABEL: test_vld3_dup_s16
-// CHECK: vld3.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_dup_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x3_t [[TMP6]]
int16x4x3_t test_vld3_dup_s16(int16_t const * a) {
return vld3_dup_s16(a);
}
-// CHECK-LABEL: test_vld3_dup_s32
-// CHECK: vld3.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_dup_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x3_t [[TMP6]]
int32x2x3_t test_vld3_dup_s32(int32_t const * a) {
return vld3_dup_s32(a);
}
-// CHECK-LABEL: test_vld3_dup_s64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_dup_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x3_t [[TMP6]]
int64x1x3_t test_vld3_dup_s64(int64_t const * a) {
return vld3_dup_s64(a);
}
-// CHECK-LABEL: test_vld3_dup_f16
-// CHECK: vld3.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_dup_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x3_t [[TMP6]]
float16x4x3_t test_vld3_dup_f16(float16_t const * a) {
return vld3_dup_f16(a);
}
-// CHECK-LABEL: test_vld3_dup_f32
-// CHECK: vld3.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_dup_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
+// CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x3_t [[TMP6]]
float32x2x3_t test_vld3_dup_f32(float32_t const * a) {
return vld3_dup_f32(a);
}
-// CHECK-LABEL: test_vld3_dup_p8
-// CHECK: vld3.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_dup_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
+// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x3_t [[TMP4]]
poly8x8x3_t test_vld3_dup_p8(poly8_t const * a) {
return vld3_dup_p8(a);
}
-// CHECK-LABEL: test_vld3_dup_p16
-// CHECK: vld3.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_dup_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x3_t [[TMP6]]
poly16x4x3_t test_vld3_dup_p16(poly16_t const * a) {
return vld3_dup_p16(a);
}
-// CHECK-LABEL: test_vld3q_lane_u16
-// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
+// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, i64 7, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x3_t [[TMP1]]6
uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) {
return vld3q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: test_vld3q_lane_u32
-// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
+// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i8(<4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x3_t [[TMP1]]6
uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) {
return vld3q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: test_vld3q_lane_s16
-// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
+// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, i64 7, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x3_t [[TMP1]]6
int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) {
return vld3q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: test_vld3q_lane_s32
-// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
+// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i8(<4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x3_t [[TMP1]]6
int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
return vld3q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: test_vld3q_lane_f16
-// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
+// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, i64 7, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x3_t [[TMP1]]6
float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
return vld3q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: test_vld3q_lane_f32
-// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
+// CHECK: [[VLD3_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0i8(<4 x float> [[TMP1]]0, <4 x float> [[TMP1]]1, <4 x float> [[TMP1]]2, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x float>, <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x3_t [[TMP1]]6
float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) {
return vld3q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: test_vld3q_lane_p16
-// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
+// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, i64 7, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x3_t [[TMP1]]6
poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) {
return vld3q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: test_vld3_lane_u8
-// CHECK: vld3.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a)
+// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]]
+// CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP9:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x3_t [[TMP9]]
uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) {
return vld3_lane_u8(a, b, 7);
}
-// CHECK-LABEL: test_vld3_lane_u16
-// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
+// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x3_t [[TMP1]]6
uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) {
return vld3_lane_u16(a, b, 3);
}
-// CHECK-LABEL: test_vld3_lane_u32
-// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
+// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i8(<2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, i64 1, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x3_t [[TMP1]]6
uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) {
return vld3_lane_u32(a, b, 1);
}
-// CHECK-LABEL: test_vld3_lane_s8
-// CHECK: vld3.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a)
+// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]]
+// CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP9:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x3_t [[TMP9]]
int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) {
return vld3_lane_s8(a, b, 7);
}
-// CHECK-LABEL: test_vld3_lane_s16
-// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
+// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x3_t [[TMP1]]6
int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) {
return vld3_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vld3_lane_s32
-// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
+// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i8(<2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, i64 1, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x3_t [[TMP1]]6
int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
return vld3_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vld3_lane_f16
-// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
+// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x3_t [[TMP1]]6
float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
return vld3_lane_f16(a, b, 3);
}
-// CHECK-LABEL: test_vld3_lane_f32
-// CHECK: vld3.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
+// CHECK: [[VLD3_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0i8(<2 x float> [[TMP1]]0, <2 x float> [[TMP1]]1, <2 x float> [[TMP1]]2, i64 1, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x float>, <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x3_t [[TMP1]]6
float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) {
return vld3_lane_f32(a, b, 1);
}
-// CHECK-LABEL: test_vld3_lane_p8
-// CHECK: vld3.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a)
+// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]]
+// CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP9:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x3_t [[TMP9]]
poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) {
return vld3_lane_p8(a, b, 7);
}
-// CHECK-LABEL: test_vld3_lane_p16
-// CHECK: vld3.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
+// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]3
+// CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false)
+// CHECK: [[TMP16:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x3_t [[TMP1]]6
poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) {
return vld3_lane_p16(a, b, 3);
}
-// CHECK-LABEL: test_vld4q_u8
-// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint8x16x4_t [[TMP5]]
uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
return vld4q_u8(a);
}
-// CHECK-LABEL: test_vld4q_u16
-// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x4_t [[TMP6]]
uint16x8x4_t test_vld4q_u16(uint16_t const * a) {
return vld4q_u16(a);
}
-// CHECK-LABEL: test_vld4q_u32
-// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x4_t [[TMP6]]
uint32x4x4_t test_vld4q_u32(uint32_t const * a) {
return vld4q_u32(a);
}
-// CHECK-LABEL: test_vld4q_s8
-// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int8x16x4_t [[TMP5]]
int8x16x4_t test_vld4q_s8(int8_t const * a) {
return vld4q_s8(a);
}
-// CHECK-LABEL: test_vld4q_s16
-// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x4_t [[TMP6]]
int16x8x4_t test_vld4q_s16(int16_t const * a) {
return vld4q_s16(a);
}
-// CHECK-LABEL: test_vld4q_s32
-// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x4_t [[TMP6]]
int32x4x4_t test_vld4q_s32(int32_t const * a) {
return vld4q_s32(a);
}
-// CHECK-LABEL: test_vld4q_f16
-// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x4_t [[TMP6]]
float16x8x4_t test_vld4q_f16(float16_t const * a) {
return vld4q_f16(a);
}
-// CHECK-LABEL: test_vld4q_f32
-// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
+// CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x4_t [[TMP6]]
float32x4x4_t test_vld4q_f32(float32_t const * a) {
return vld4q_f32(a);
}
-// CHECK-LABEL: test_vld4q_p8
-// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly8x16x4_t [[TMP5]]
poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
return vld4q_p8(a);
}
-// CHECK-LABEL: test_vld4q_p16
-// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
+// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x4_t [[TMP6]]
poly16x8x4_t test_vld4q_p16(poly16_t const * a) {
return vld4q_p16(a);
}
-// CHECK-LABEL: test_vld4_u8
-// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x4_t [[TMP5]]
uint8x8x4_t test_vld4_u8(uint8_t const * a) {
return vld4_u8(a);
}
-// CHECK-LABEL: test_vld4_u16
-// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x4_t [[TMP6]]
uint16x4x4_t test_vld4_u16(uint16_t const * a) {
return vld4_u16(a);
}
-// CHECK-LABEL: test_vld4_u32
-// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x4_t [[TMP6]]
uint32x2x4_t test_vld4_u32(uint32_t const * a) {
return vld4_u32(a);
}
-// CHECK-LABEL: test_vld4_u64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x4_t [[TMP6]]
uint64x1x4_t test_vld4_u64(uint64_t const * a) {
return vld4_u64(a);
}
-// CHECK-LABEL: test_vld4_s8
-// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x4_t [[TMP5]]
int8x8x4_t test_vld4_s8(int8_t const * a) {
return vld4_s8(a);
}
-// CHECK-LABEL: test_vld4_s16
-// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x4_t [[TMP6]]
int16x4x4_t test_vld4_s16(int16_t const * a) {
return vld4_s16(a);
}
-// CHECK-LABEL: test_vld4_s32
-// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x4_t [[TMP6]]
int32x2x4_t test_vld4_s32(int32_t const * a) {
return vld4_s32(a);
}
-// CHECK-LABEL: test_vld4_s64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x4_t [[TMP6]]
int64x1x4_t test_vld4_s64(int64_t const * a) {
return vld4_s64(a);
}
-// CHECK-LABEL: test_vld4_f16
-// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x4_t [[TMP6]]
float16x4x4_t test_vld4_f16(float16_t const * a) {
return vld4_f16(a);
}
-// CHECK-LABEL: test_vld4_f32
-// CHECK: vld4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
+// CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x4_t [[TMP6]]
float32x2x4_t test_vld4_f32(float32_t const * a) {
return vld4_f32(a);
}
-// CHECK-LABEL: test_vld4_p8
-// CHECK: vld4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x4_t [[TMP5]]
poly8x8x4_t test_vld4_p8(poly8_t const * a) {
return vld4_p8(a);
}
-// CHECK-LABEL: test_vld4_p16
-// CHECK: vld4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x4_t [[TMP6]]
poly16x4x4_t test_vld4_p16(poly16_t const * a) {
return vld4_p16(a);
}
-// CHECK-LABEL: test_vld4_dup_u8
-// CHECK: vld4.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_dup_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
+// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x4_t [[TMP4]]
uint8x8x4_t test_vld4_dup_u8(uint8_t const * a) {
return vld4_dup_u8(a);
}
-// CHECK-LABEL: test_vld4_dup_u16
-// CHECK: vld4.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_dup_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x4_t [[TMP6]]
uint16x4x4_t test_vld4_dup_u16(uint16_t const * a) {
return vld4_dup_u16(a);
}
-// CHECK-LABEL: test_vld4_dup_u32
-// CHECK: vld4.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_dup_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x4_t [[TMP6]]
uint32x2x4_t test_vld4_dup_u32(uint32_t const * a) {
return vld4_dup_u32(a);
}
-// CHECK-LABEL: test_vld4_dup_u64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_dup_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x4_t [[TMP6]]
uint64x1x4_t test_vld4_dup_u64(uint64_t const * a) {
return vld4_dup_u64(a);
}
-// CHECK-LABEL: test_vld4_dup_s8
-// CHECK: vld4.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_dup_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
+// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x4_t [[TMP4]]
int8x8x4_t test_vld4_dup_s8(int8_t const * a) {
return vld4_dup_s8(a);
}
-// CHECK-LABEL: test_vld4_dup_s16
-// CHECK: vld4.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_dup_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x4_t [[TMP6]]
int16x4x4_t test_vld4_dup_s16(int16_t const * a) {
return vld4_dup_s16(a);
}
-// CHECK-LABEL: test_vld4_dup_s32
-// CHECK: vld4.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_dup_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x4_t [[TMP6]]
int32x2x4_t test_vld4_dup_s32(int32_t const * a) {
return vld4_dup_s32(a);
}
-// CHECK-LABEL: test_vld4_dup_s64
-// CHECK: vld1.64
+// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_dup_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x4_t [[TMP6]]
int64x1x4_t test_vld4_dup_s64(int64_t const * a) {
return vld4_dup_s64(a);
}
-// CHECK-LABEL: test_vld4_dup_f16
-// CHECK: vld4.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_dup_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x4_t [[TMP6]]
float16x4x4_t test_vld4_dup_f16(float16_t const * a) {
return vld4_dup_f16(a);
}
-// CHECK-LABEL: test_vld4_dup_f32
-// CHECK: vld4.32 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_dup_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
+// CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x4_t [[TMP6]]
float32x2x4_t test_vld4_dup_f32(float32_t const * a) {
return vld4_dup_f32(a);
}
-// CHECK-LABEL: test_vld4_dup_p8
-// CHECK: vld4.8 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_dup_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
+// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x4_t [[TMP4]]
poly8x8x4_t test_vld4_dup_p8(poly8_t const * a) {
return vld4_dup_p8(a);
}
-// CHECK-LABEL: test_vld4_dup_p16
-// CHECK: vld4.16 {d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[], d{{[0-9]+}}[]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_dup_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x4_t [[TMP6]]
poly16x4x4_t test_vld4_dup_p16(poly16_t const * a) {
return vld4_dup_p16(a);
}
-// CHECK-LABEL: test_vld4q_lane_u16
-// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP1]]0 to <16 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
+// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <8 x i16>
+// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, <8 x i16> [[TMP1]]5, i64 7, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x4_t [[TMP1]]9
uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) {
return vld4q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: test_vld4q_lane_u32
-// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP1]]0 to <16 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
+// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <4 x i32>
+// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i8(<4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, <4 x i32> [[TMP1]]5, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x4_t [[TMP1]]9
uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) {
return vld4q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: test_vld4q_lane_s16
-// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP1]]0 to <16 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
+// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <8 x i16>
+// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, <8 x i16> [[TMP1]]5, i64 7, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x4_t [[TMP1]]9
int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) {
return vld4q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: test_vld4q_lane_s32
-// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP1]]0 to <16 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
+// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <4 x i32>
+// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i8(<4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, <4 x i32> [[TMP1]]5, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x4_t [[TMP1]]9
int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
return vld4q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: test_vld4q_lane_f16
-// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP1]]0 to <16 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
+// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <8 x i16>
+// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, <8 x i16> [[TMP1]]5, i64 7, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x4_t [[TMP1]]9
float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
return vld4q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: test_vld4q_lane_f32
-// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP1]]0 to <16 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
+// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <4 x float>
+// CHECK: [[VLD4_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0i8(<4 x float> [[TMP1]]2, <4 x float> [[TMP1]]3, <4 x float> [[TMP1]]4, <4 x float> [[TMP1]]5, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x4_t [[TMP1]]9
float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) {
return vld4q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: test_vld4q_lane_p16
-// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}
+// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP1]]0 to <16 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
+// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <8 x i16>
+// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, <8 x i16> [[TMP1]]5, i64 7, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x4_t [[TMP1]]9
poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) {
return vld4q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: test_vld4_lane_u8
-// CHECK: vld4.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8
+// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, i8* %a)
+// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
+// CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP10:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x4_t [[TMP1]]0
uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) {
return vld4_lane_u8(a, b, 7);
}
-// CHECK-LABEL: test_vld4_lane_u16
-// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP1]]0 to <8 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
+// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <4 x i16>
+// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, <4 x i16> [[TMP1]]5, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x4_t [[TMP1]]9
uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) {
return vld4_lane_u16(a, b, 3);
}
-// CHECK-LABEL: test_vld4_lane_u32
-// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP1]]0 to <8 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
+// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <2 x i32>
+// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i8(<2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, <2 x i32> [[TMP1]]5, i64 1, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x4_t [[TMP1]]9
uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) {
return vld4_lane_u32(a, b, 1);
}
-// CHECK-LABEL: test_vld4_lane_s8
-// CHECK: vld4.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8
+// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, i8* %a)
+// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
+// CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP10:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x4_t [[TMP1]]0
int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) {
return vld4_lane_s8(a, b, 7);
}
-// CHECK-LABEL: test_vld4_lane_s16
-// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP1]]0 to <8 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
+// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <4 x i16>
+// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, <4 x i16> [[TMP1]]5, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x4_t [[TMP1]]9
int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) {
return vld4_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vld4_lane_s32
-// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP1]]0 to <8 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
+// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <2 x i32>
+// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i8(<2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, <2 x i32> [[TMP1]]5, i64 1, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x4_t [[TMP1]]9
int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
return vld4_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vld4_lane_f16
-// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP1]]0 to <8 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
+// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <4 x i16>
+// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, <4 x i16> [[TMP1]]5, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x4_t [[TMP1]]9
float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
return vld4_lane_f16(a, b, 3);
}
-// CHECK-LABEL: test_vld4_lane_f32
-// CHECK: vld4.32 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP1]]0 to <8 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
+// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <2 x float>
+// CHECK: [[VLD4_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0i8(<2 x float> [[TMP1]]2, <2 x float> [[TMP1]]3, <2 x float> [[TMP1]]4, <2 x float> [[TMP1]]5, i64 1, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x4_t [[TMP1]]9
float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) {
return vld4_lane_f32(a, b, 1);
}
-// CHECK-LABEL: test_vld4_lane_p8
-// CHECK: vld4.8 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8
+// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, i8* %a)
+// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]]
+// CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP10:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x4_t [[TMP1]]0
poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) {
return vld4_lane_p8(a, b, 7);
}
-// CHECK-LABEL: test_vld4_lane_p16
-// CHECK: vld4.16 {d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}], d{{[0-9]+}}[{{[0-9]+}}]}, [r{{[0-9]+}}]
+// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP1]]0 to <8 x i8>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
+// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <4 x i16>
+// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, <4 x i16> [[TMP1]]5, i64 3, i8* [[TMP3]])
+// CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]6
+// CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false)
+// CHECK: [[TMP19:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x4_t [[TMP1]]9
poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) {
return vld4_lane_p16(a, b, 3);
}
-// CHECK-LABEL: test_vmax_s8
-// CHECK: vmax.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VMAX_I]]
int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
return vmax_s8(a, b);
}
-// CHECK-LABEL: test_vmax_s16
-// CHECK: vmax.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
+// CHECK: ret <4 x i16> [[VMAX2_I]]
int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
return vmax_s16(a, b);
}
-// CHECK-LABEL: test_vmax_s32
-// CHECK: vmax.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
+// CHECK: ret <2 x i32> [[VMAX2_I]]
int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
return vmax_s32(a, b);
}
-// CHECK-LABEL: test_vmax_u8
-// CHECK: vmax.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VMAX_I]]
uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
return vmax_u8(a, b);
}
-// CHECK-LABEL: test_vmax_u16
-// CHECK: vmax.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
+// CHECK: ret <4 x i16> [[VMAX2_I]]
uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
return vmax_u16(a, b);
}
-// CHECK-LABEL: test_vmax_u32
-// CHECK: vmax.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
+// CHECK: ret <2 x i32> [[VMAX2_I]]
uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
return vmax_u32(a, b);
}
-// CHECK-LABEL: test_vmax_f32
-// CHECK: vmax.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> [[VMAX_I]], <2 x float> [[VMAX1_I]]) #4
+// CHECK: ret <2 x float> [[VMAX2_I]]
float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
return vmax_f32(a, b);
}
-// CHECK-LABEL: test_vmaxq_s8
-// CHECK: vmax.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VMAX_I]]
int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
return vmaxq_s8(a, b);
}
-// CHECK-LABEL: test_vmaxq_s16
-// CHECK: vmax.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
+// CHECK: ret <8 x i16> [[VMAX2_I]]
int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
return vmaxq_s16(a, b);
}
-// CHECK-LABEL: test_vmaxq_s32
-// CHECK: vmax.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
+// CHECK: ret <4 x i32> [[VMAX2_I]]
int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
return vmaxq_s32(a, b);
}
-// CHECK-LABEL: test_vmaxq_u8
-// CHECK: vmax.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VMAX_I]]
uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
return vmaxq_u8(a, b);
}
-// CHECK-LABEL: test_vmaxq_u16
-// CHECK: vmax.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
+// CHECK: ret <8 x i16> [[VMAX2_I]]
uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
return vmaxq_u16(a, b);
}
-// CHECK-LABEL: test_vmaxq_u32
-// CHECK: vmax.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
+// CHECK: ret <4 x i32> [[VMAX2_I]]
uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
return vmaxq_u32(a, b);
}
-// CHECK-LABEL: test_vmaxq_f32
-// CHECK: vmax.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> [[VMAX_I]], <4 x float> [[VMAX1_I]]) #4
+// CHECK: ret <4 x float> [[VMAX2_I]]
float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
return vmaxq_f32(a, b);
}
-// CHECK-LABEL: test_vmin_s8
-// CHECK: vmin.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VMIN_I]]
int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
return vmin_s8(a, b);
}
-// CHECK-LABEL: test_vmin_s16
-// CHECK: vmin.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
+// CHECK: ret <4 x i16> [[VMIN2_I]]
int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
return vmin_s16(a, b);
}
-// CHECK-LABEL: test_vmin_s32
-// CHECK: vmin.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
+// CHECK: ret <2 x i32> [[VMIN2_I]]
int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
return vmin_s32(a, b);
}
-// CHECK-LABEL: test_vmin_u8
-// CHECK: vmin.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VMIN_I]]
uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
return vmin_u8(a, b);
}
-// CHECK-LABEL: test_vmin_u16
-// CHECK: vmin.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
+// CHECK: ret <4 x i16> [[VMIN2_I]]
uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
return vmin_u16(a, b);
}
-// CHECK-LABEL: test_vmin_u32
-// CHECK: vmin.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
+// CHECK: ret <2 x i32> [[VMIN2_I]]
uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
return vmin_u32(a, b);
}
-// CHECK-LABEL: test_vmin_f32
-// CHECK: vmin.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> [[VMIN_I]], <2 x float> [[VMIN1_I]]) #4
+// CHECK: ret <2 x float> [[VMIN2_I]]
float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
return vmin_f32(a, b);
}
-// CHECK-LABEL: test_vminq_s8
-// CHECK: vmin.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VMIN_I]]
int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
return vminq_s8(a, b);
}
-// CHECK-LABEL: test_vminq_s16
-// CHECK: vmin.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
+// CHECK: ret <8 x i16> [[VMIN2_I]]
int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
return vminq_s16(a, b);
}
-// CHECK-LABEL: test_vminq_s32
-// CHECK: vmin.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
+// CHECK: ret <4 x i32> [[VMIN2_I]]
int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
return vminq_s32(a, b);
}
-// CHECK-LABEL: test_vminq_u8
-// CHECK: vmin.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VMIN_I]]
uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
return vminq_u8(a, b);
}
-// CHECK-LABEL: test_vminq_u16
-// CHECK: vmin.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
+// CHECK: ret <8 x i16> [[VMIN2_I]]
uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
return vminq_u16(a, b);
}
-// CHECK-LABEL: test_vminq_u32
-// CHECK: vmin.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
+// CHECK: ret <4 x i32> [[VMIN2_I]]
uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
return vminq_u32(a, b);
}
-// CHECK-LABEL: test_vminq_f32
-// CHECK: vmin.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> [[VMIN_I]], <4 x float> [[VMIN1_I]]) #4
+// CHECK: ret <4 x float> [[VMIN2_I]]
float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
return vminq_f32(a, b);
}
-// CHECK-LABEL: test_vmla_s8
-// CHECK: vmla.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
+// CHECK: ret <8 x i8> [[ADD_I]]
int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
return vmla_s8(a, b, c);
}
-// CHECK-LABEL: test_vmla_s16
-// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmla_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
+// CHECK: ret <4 x i16> [[ADD_I]]
int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmla_s16(a, b, c);
}
-// CHECK-LABEL: test_vmla_s32
-// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmla_s32(a, b, c);
}
-// CHECK-LABEL: test_vmla_f32
-// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-SWIFT: vadd.f32
-// CHECK-A57: vmla.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
+// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
+// CHECK: ret <2 x float> [[ADD_I]]
float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmla_f32(a, b, c);
}
-// CHECK-LABEL: test_vmla_u8
-// CHECK: vmla.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
+// CHECK: ret <8 x i8> [[ADD_I]]
uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vmla_u8(a, b, c);
}
-// CHECK-LABEL: test_vmla_u16
-// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
+// CHECK: ret <4 x i16> [[ADD_I]]
uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmla_u16(a, b, c);
}
-// CHECK-LABEL: test_vmla_u32
-// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmla_u32(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_s8
-// CHECK: vmla.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
+// CHECK: ret <16 x i8> [[ADD_I]]
int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
return vmlaq_s8(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_s16
-// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
return vmlaq_s16(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_s32
-// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
return vmlaq_s32(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_f32
-// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-SWIFT: vadd.f32
-// CHECK-A57: vmla.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
+// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
+// CHECK: ret <4 x float> [[ADD_I]]
float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
return vmlaq_f32(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_u8
-// CHECK: vmla.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
+// CHECK: ret <16 x i8> [[ADD_I]]
uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vmlaq_u8(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_u16
-// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vmlaq_u16(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_u32
-// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vmlaq_u32(a, b, c);
}
-// CHECK-LABEL: test_vmlal_s8
-// CHECK: vmlal.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
return vmlal_s8(a, b, c);
}
-// CHECK-LABEL: test_vmlal_s16
-// CHECK: vmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlal_s16(a, b, c);
}
-// CHECK-LABEL: test_vmlal_s32
-// CHECK: vmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlal_s32(a, b, c);
}
-// CHECK-LABEL: test_vmlal_u8
-// CHECK: vmlal.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vmlal_u8(a, b, c);
}
-// CHECK-LABEL: test_vmlal_u16
-// CHECK: vmlal.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlal_u16(a, b, c);
}
-// CHECK-LABEL: test_vmlal_u32
-// CHECK: vmlal.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlal_u32(a, b, c);
}
-// CHECK-LABEL: test_vmlal_lane_s16
-// CHECK: vmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
+// CHECK: ret <4 x i32> [[ADD]]
int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlal_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmlal_lane_s32
-// CHECK: vmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
+// CHECK: ret <2 x i64> [[ADD]]
int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlal_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlal_lane_u16
-// CHECK: vmlal.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]]
+// CHECK: ret <4 x i32> [[ADD]]
uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlal_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmlal_lane_u32
-// CHECK: vmlal.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]]
+// CHECK: ret <2 x i64> [[ADD]]
uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlal_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlal_n_s16
-// CHECK: vmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
return vmlal_n_s16(a, b, c);
}
-// CHECK-LABEL: test_vmlal_n_s32
-// CHECK: vmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
return vmlal_n_s32(a, b, c);
}
-// CHECK-LABEL: test_vmlal_n_u16
-// CHECK: vmlal.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlal_n_u16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
return vmlal_n_u16(a, b, c);
}
-// CHECK-LABEL: test_vmlal_n_u32
-// CHECK: vmlal.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmlal_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
return vmlal_n_u32(a, b, c);
}
-// CHECK-LABEL: test_vmla_lane_s16
-// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
+// CHECK: ret <4 x i16> [[ADD]]
int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmla_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmla_lane_s32
-// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
+// CHECK: ret <2 x i32> [[ADD]]
int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmla_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmla_lane_u16
-// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]]
+// CHECK: ret <4 x i16> [[ADD]]
uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmla_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmla_lane_u32
-// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]]
+// CHECK: ret <2 x i32> [[ADD]]
uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmla_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmla_lane_f32
-// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
-// CHECK-SWIFT: vadd.f32
-// CHECK-A57: vmla.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
+// CHECK: ret <2 x float> [[ADD]]
float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmla_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlaq_lane_s16
-// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
+// CHECK: ret <8 x i16> [[ADD]]
int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
return vmlaq_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmlaq_lane_s32
-// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
+// CHECK: ret <4 x i32> [[ADD]]
int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
return vmlaq_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlaq_lane_u16
-// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]]
+// CHECK: ret <8 x i16> [[ADD]]
uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
return vmlaq_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmlaq_lane_u32
-// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]]
+// CHECK: ret <4 x i32> [[ADD]]
uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
return vmlaq_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlaq_lane_f32
-// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
-// CHECK-SWIFT: vadd.f32
-// CHECK-A57: vmla.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
+// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
+// CHECK: ret <4 x float> [[ADD]]
float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
return vmlaq_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmla_n_s16
-// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmla_n_s16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
+// CHECK: ret <4 x i16> [[ADD_I]]
int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
return vmla_n_s16(a, b, c);
}
-// CHECK-LABEL: test_vmla_n_s32
-// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmla_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
return vmla_n_s32(a, b, c);
}
-// CHECK-LABEL: test_vmla_n_u16
-// CHECK: vmla.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmla_n_u16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
+// CHECK: ret <4 x i16> [[ADD_I]]
uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
return vmla_n_u16(a, b, c);
}
-// CHECK-LABEL: test_vmla_n_u32
-// CHECK: vmla.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmla_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
return vmla_n_u32(a, b, c);
}
-// CHECK-LABEL: test_vmla_n_f32
-// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-SWIFT: vadd.f32
-// CHECK-A57: vmla.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
+// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
+// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
+// CHECK: ret <2 x float> [[ADD_I]]
float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
return vmla_n_f32(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_n_s16
-// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
return vmlaq_n_s16(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_n_s32
-// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
return vmlaq_n_s32(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_n_u16
-// CHECK: vmla.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
return vmlaq_n_u16(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_n_u32
-// CHECK: vmla.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
return vmlaq_n_u32(a, b, c);
}
-// CHECK-LABEL: test_vmlaq_n_f32
-// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0]
-// CHECK-SWIFT: vadd.f32
-// CHECK-A57: vld1.32 {d{{[0-9]+}}[], d{{[0-9]+}}[]},
-// CHECK-A57: vmla.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
+// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
+// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
+// CHECK: ret <4 x float> [[ADD_I]]
float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
return vmlaq_n_f32(a, b, c);
}
-// CHECK-LABEL: test_vmls_s8
-// CHECK: vmls.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
+// CHECK: ret <8 x i8> [[SUB_I]]
int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
return vmls_s8(a, b, c);
}
-// CHECK-LABEL: test_vmls_s16
-// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmls_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
+// CHECK: ret <4 x i16> [[SUB_I]]
int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmls_s16(a, b, c);
}
-// CHECK-LABEL: test_vmls_s32
-// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
+// CHECK: ret <2 x i32> [[SUB_I]]
int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmls_s32(a, b, c);
}
-// CHECK-LABEL: test_vmls_f32
-// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-SWIFT: vsub.f32
-// CHECK-A57: vmls.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
+// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
+// CHECK: ret <2 x float> [[SUB_I]]
float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmls_f32(a, b, c);
}
-// CHECK-LABEL: test_vmls_u8
-// CHECK: vmls.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
+// CHECK: ret <8 x i8> [[SUB_I]]
uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vmls_u8(a, b, c);
}
-// CHECK-LABEL: test_vmls_u16
-// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
+// CHECK: ret <4 x i16> [[SUB_I]]
uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmls_u16(a, b, c);
}
-// CHECK-LABEL: test_vmls_u32
-// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
+// CHECK: ret <2 x i32> [[SUB_I]]
uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmls_u32(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_s8
-// CHECK: vmls.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
+// CHECK: ret <16 x i8> [[SUB_I]]
int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
return vmlsq_s8(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_s16
-// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
return vmlsq_s16(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_s32
-// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
return vmlsq_s32(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_f32
-// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-SWIFT: vsub.f32
-// CHECK-A57: vmls.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
+// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
+// CHECK: ret <4 x float> [[SUB_I]]
float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
return vmlsq_f32(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_u8
-// CHECK: vmls.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
+// CHECK: ret <16 x i8> [[SUB_I]]
uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vmlsq_u8(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_u16
-// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vmlsq_u16(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_u32
-// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vmlsq_u32(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_s8
-// CHECK: vmlsl.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
return vmlsl_s8(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_s16
-// CHECK: vmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlsl_s16(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_s32
-// CHECK: vmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlsl_s32(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_u8
-// CHECK: vmlsl.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vmlsl_u8(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_u16
-// CHECK: vmlsl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlsl_u16(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_u32
-// CHECK: vmlsl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlsl_u32(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_lane_s16
-// CHECK: vmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
+// CHECK: ret <4 x i32> [[SUB]]
int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlsl_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmlsl_lane_s32
-// CHECK: vmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
+// CHECK: ret <2 x i64> [[SUB]]
int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlsl_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlsl_lane_u16
-// CHECK: vmlsl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]]
+// CHECK: ret <4 x i32> [[SUB]]
uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlsl_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmlsl_lane_u32
-// CHECK: vmlsl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]]
+// CHECK: ret <2 x i64> [[SUB]]
uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlsl_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlsl_n_s16
-// CHECK: vmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
return vmlsl_n_s16(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_n_s32
-// CHECK: vmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
return vmlsl_n_s32(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_n_u16
-// CHECK: vmlsl.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_u16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
return vmlsl_n_u16(a, b, c);
}
-// CHECK-LABEL: test_vmlsl_n_u32
-// CHECK: vmlsl.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
return vmlsl_n_u32(a, b, c);
}
-// CHECK-LABEL: test_vmls_lane_s16
-// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
+// CHECK: ret <4 x i16> [[SUB]]
int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmls_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmls_lane_s32
-// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
+// CHECK: ret <2 x i32> [[SUB]]
int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmls_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmls_lane_u16
-// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]]
+// CHECK: ret <4 x i16> [[SUB]]
uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmls_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmls_lane_u32
-// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]]
+// CHECK: ret <2 x i32> [[SUB]]
uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmls_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmls_lane_f32
-// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
-// CHECK-SWIFT: vsub.f32
-// CHECK-A57: vmls.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
+// CHECK: ret <2 x float> [[SUB]]
float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmls_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlsq_lane_s16
-// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
+// CHECK: ret <8 x i16> [[SUB]]
int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
return vmlsq_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmlsq_lane_s32
-// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
+// CHECK: ret <4 x i32> [[SUB]]
int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
return vmlsq_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlsq_lane_u16
-// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]]
+// CHECK: ret <8 x i16> [[SUB]]
uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
return vmlsq_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: test_vmlsq_lane_u32
-// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]]
+// CHECK: ret <4 x i32> [[SUB]]
uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
return vmlsq_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmlsq_lane_f32
-// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
-// CHECK-SWIFT: vsub.f32
-// CHECK-A57: vmls.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
+// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
+// CHECK: ret <4 x float> [[SUB]]
float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
return vmlsq_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: test_vmls_n_s16
-// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmls_n_s16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
+// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
+// CHECK: ret <4 x i16> [[SUB_I]]
int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
return vmls_n_s16(a, b, c);
}
-// CHECK-LABEL: test_vmls_n_s32
-// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmls_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
+// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
+// CHECK: ret <2 x i32> [[SUB_I]]
int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
return vmls_n_s32(a, b, c);
}
-// CHECK-LABEL: test_vmls_n_u16
-// CHECK: vmls.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmls_n_u16(<4 x i16> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
+// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
+// CHECK: ret <4 x i16> [[SUB_I]]
uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
return vmls_n_u16(a, b, c);
}
-// CHECK-LABEL: test_vmls_n_u32
-// CHECK: vmls.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmls_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
+// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
+// CHECK: ret <2 x i32> [[SUB_I]]
uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
return vmls_n_u32(a, b, c);
}
-// CHECK-LABEL: test_vmls_n_f32
-// CHECK-SWIFT: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-SWIFT: vsub.f32
-// CHECK-A57: vmls.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
+// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
+// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
+// CHECK: ret <2 x float> [[SUB_I]]
float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
return vmls_n_f32(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_n_s16
-// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
return vmlsq_n_s16(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_n_s32
-// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
return vmlsq_n_s32(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_n_u16
-// CHECK: vmls.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
return vmlsq_n_u16(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_n_u32
-// CHECK: vmls.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
return vmlsq_n_u32(a, b, c);
}
-// CHECK-LABEL: test_vmlsq_n_f32
-// CHECK-SWIFT: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0]
-// CHECK-SWIFT: vsub.f32
-// CHECK-A57: vmls.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
+// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
+// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
+// CHECK: ret <4 x float> [[SUB_I]]
float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
return vmlsq_n_f32(a, b, c);
}
-// CHECK-LABEL: test_vmovl_s8
-// CHECK: vmovl.s8 q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 {
+// CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I]]
int16x8_t test_vmovl_s8(int8x8_t a) {
return vmovl_s8(a);
}
-// CHECK-LABEL: test_vmovl_s16
-// CHECK: vmovl.s16 q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I]]
int32x4_t test_vmovl_s16(int16x4_t a) {
return vmovl_s16(a);
}
-// CHECK-LABEL: test_vmovl_s32
-// CHECK: vmovl.s32 q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I]]
int64x2_t test_vmovl_s32(int32x2_t a) {
return vmovl_s32(a);
}
-// CHECK-LABEL: test_vmovl_u8
-// CHECK: vmovl.u8 q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 {
+// CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I]]
uint16x8_t test_vmovl_u8(uint8x8_t a) {
return vmovl_u8(a);
}
-// CHECK-LABEL: test_vmovl_u16
-// CHECK: vmovl.u16 q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I]]
uint32x4_t test_vmovl_u16(uint16x4_t a) {
return vmovl_u16(a);
}
-// CHECK-LABEL: test_vmovl_u32
-// CHECK: vmovl.u32 q{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I]]
uint64x2_t test_vmovl_u32(uint32x2_t a) {
return vmovl_u32(a);
}
-// CHECK-LABEL: test_vmovn_s16
-// CHECK: vmovn.i16 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VMOVN_I]]
int8x8_t test_vmovn_s16(int16x8_t a) {
return vmovn_s16(a);
}
-// CHECK-LABEL: test_vmovn_s32
-// CHECK: vmovn.i32 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VMOVN_I]]
int16x4_t test_vmovn_s32(int32x4_t a) {
return vmovn_s32(a);
}
-// CHECK-LABEL: test_vmovn_s64
-// CHECK: vmovn.i64 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VMOVN_I]]
int32x2_t test_vmovn_s64(int64x2_t a) {
return vmovn_s64(a);
}
-// CHECK-LABEL: test_vmovn_u16
-// CHECK: vmovn.i16 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmovn_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VMOVN_I]]
uint8x8_t test_vmovn_u16(uint16x8_t a) {
return vmovn_u16(a);
}
-// CHECK-LABEL: test_vmovn_u32
-// CHECK: vmovn.i32 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmovn_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VMOVN_I]]
uint16x4_t test_vmovn_u32(uint32x4_t a) {
return vmovn_u32(a);
}
-// CHECK-LABEL: test_vmovn_u64
-// CHECK: vmovn.i64 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmovn_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VMOVN_I]]
uint32x2_t test_vmovn_u64(uint64x2_t a) {
return vmovn_u64(a);
}
-// CHECK-LABEL: test_vmov_n_u8
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmov_n_u8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: ret <8 x i8> [[VECINIT7_I]]
uint8x8_t test_vmov_n_u8(uint8_t a) {
return vmov_n_u8(a);
}
-// CHECK-LABEL: test_vmov_n_u16
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmov_n_u16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: ret <4 x i16> [[VECINIT3_I]]
uint16x4_t test_vmov_n_u16(uint16_t a) {
return vmov_n_u16(a);
}
-// CHECK-LABEL: test_vmov_n_u32
-// CHECK: mov {{r[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmov_n_u32(i32 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
+// CHECK: ret <2 x i32> [[VECINIT1_I]]
uint32x2_t test_vmov_n_u32(uint32_t a) {
return vmov_n_u32(a);
}
-// CHECK-LABEL: test_vmov_n_s8
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmov_n_s8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: ret <8 x i8> [[VECINIT7_I]]
int8x8_t test_vmov_n_s8(int8_t a) {
return vmov_n_s8(a);
}
-// CHECK-LABEL: test_vmov_n_s16
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmov_n_s16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: ret <4 x i16> [[VECINIT3_I]]
int16x4_t test_vmov_n_s16(int16_t a) {
return vmov_n_s16(a);
}
-// CHECK-LABEL: test_vmov_n_s32
-// CHECK: mov {{r[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmov_n_s32(i32 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
+// CHECK: ret <2 x i32> [[VECINIT1_I]]
int32x2_t test_vmov_n_s32(int32_t a) {
return vmov_n_s32(a);
}
-// CHECK-LABEL: test_vmov_n_p8
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmov_n_p8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: ret <8 x i8> [[VECINIT7_I]]
poly8x8_t test_vmov_n_p8(poly8_t a) {
return vmov_n_p8(a);
}
-// CHECK-LABEL: test_vmov_n_p16
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmov_n_p16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: ret <4 x i16> [[VECINIT3_I]]
poly16x4_t test_vmov_n_p16(poly16_t a) {
return vmov_n_p16(a);
}
-// CHECK-LABEL: test_vmov_n_f16
-// CHECK: vld1.16 {{{d[0-9]+\[\]}}}
+// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
+// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
+// CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT]]1, half [[TMP0]], i32 2
+// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT]]2, half [[TMP0]], i32 3
+// CHECK: ret <4 x half> [[VECINIT]]3
float16x4_t test_vmov_n_f16(float16_t *a) {
return vmov_n_f16(*a);
}
-// CHECK-LABEL: test_vmov_n_f32
-// CHECK: mov {{r[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vmov_n_f32(float %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
+// CHECK: ret <2 x float> [[VECINIT1_I]]
float32x2_t test_vmov_n_f32(float32_t a) {
return vmov_n_f32(a);
}
-// CHECK-LABEL: test_vmovq_n_u8
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmovq_n_u8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
+// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
+// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
+// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
+// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
+// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
+// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
+// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
+// CHECK: ret <16 x i8> [[VECINIT15_I]]
uint8x16_t test_vmovq_n_u8(uint8_t a) {
return vmovq_n_u8(a);
}
-// CHECK-LABEL: test_vmovq_n_u16
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmovq_n_u16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
+// CHECK: ret <8 x i16> [[VECINIT7_I]]
uint16x8_t test_vmovq_n_u16(uint16_t a) {
return vmovq_n_u16(a);
}
-// CHECK-LABEL: test_vmovq_n_u32
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmovq_n_u32(i32 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
+// CHECK: ret <4 x i32> [[VECINIT3_I]]
uint32x4_t test_vmovq_n_u32(uint32_t a) {
return vmovq_n_u32(a);
}
-// CHECK-LABEL: test_vmovq_n_s8
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmovq_n_s8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
+// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
+// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
+// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
+// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
+// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
+// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
+// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
+// CHECK: ret <16 x i8> [[VECINIT15_I]]
int8x16_t test_vmovq_n_s8(int8_t a) {
return vmovq_n_s8(a);
}
-// CHECK-LABEL: test_vmovq_n_s16
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmovq_n_s16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
+// CHECK: ret <8 x i16> [[VECINIT7_I]]
int16x8_t test_vmovq_n_s16(int16_t a) {
return vmovq_n_s16(a);
}
-// CHECK-LABEL: test_vmovq_n_s32
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmovq_n_s32(i32 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
+// CHECK: ret <4 x i32> [[VECINIT3_I]]
int32x4_t test_vmovq_n_s32(int32_t a) {
return vmovq_n_s32(a);
}
-// CHECK-LABEL: test_vmovq_n_p8
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmovq_n_p8(i8 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
+// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
+// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
+// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
+// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
+// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
+// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
+// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
+// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
+// CHECK: ret <16 x i8> [[VECINIT15_I]]
poly8x16_t test_vmovq_n_p8(poly8_t a) {
return vmovq_n_p8(a);
}
-// CHECK-LABEL: test_vmovq_n_p16
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmovq_n_p16(i16 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
+// CHECK: ret <8 x i16> [[VECINIT7_I]]
poly16x8_t test_vmovq_n_p16(poly16_t a) {
return vmovq_n_p16(a);
}
-// CHECK-LABEL: test_vmovq_n_f16
-// CHECK: vld1.16 {{{d[0-9]+\[\], d[0-9]+\[\]}}}
+// CHECK-LABEL: define <8 x half> @test_vmovq_n_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
+// CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
+// CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT]]1, half [[TMP0]], i32 2
+// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT]]2, half [[TMP0]], i32 3
+// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT]]3, half [[TMP0]], i32 4
+// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT]]4, half [[TMP0]], i32 5
+// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT]]5, half [[TMP0]], i32 6
+// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT]]6, half [[TMP0]], i32 7
+// CHECK: ret <8 x half> [[VECINIT]]7
float16x8_t test_vmovq_n_f16(float16_t *a) {
return vmovq_n_f16(*a);
}
-// CHECK-LABEL: test_vmovq_n_f32
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vmovq_n_f32(float %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
+// CHECK: ret <4 x float> [[VECINIT3_I]]
float32x4_t test_vmovq_n_f32(float32_t a) {
return vmovq_n_f32(a);
}
-// CHECK-LABEL: test_vmov_n_s64
-// CHECK: vmov.32 [[REG:d[0-9]+]][0], r0
-// CHECK: vmov.32 [[REG]][1], r1
+// CHECK-LABEL: define <1 x i64> @test_vmov_n_s64(i64 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
+// CHECK: ret <1 x i64> [[ADD_I]]
int64x1_t test_vmov_n_s64(int64_t a) {
int64x1_t tmp = vmov_n_s64(a);
return vadd_s64(tmp, tmp);
}
-// CHECK-LABEL: test_vmov_n_u64
-// CHECK: vmov.32 [[REG:d[0-9]+]][0], r0
-// CHECK: vmov.32 [[REG]][1], r1
+// CHECK-LABEL: define <1 x i64> @test_vmov_n_u64(i64 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
+// CHECK: ret <1 x i64> [[ADD_I]]
uint64x1_t test_vmov_n_u64(uint64_t a) {
uint64x1_t tmp = vmov_n_u64(a);
return vadd_u64(tmp, tmp);
}
-// CHECK-LABEL: test_vmovq_n_s64
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmovq_n_s64(i64 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
+// CHECK: ret <2 x i64> [[VECINIT1_I]]
int64x2_t test_vmovq_n_s64(int64_t a) {
return vmovq_n_s64(a);
}
-// CHECK-LABEL: test_vmovq_n_u64
-// CHECK: vmov {{r[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmovq_n_u64(i64 %a) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
+// CHECK: ret <2 x i64> [[VECINIT1_I]]
uint64x2_t test_vmovq_n_u64(uint64_t a) {
return vmovq_n_u64(a);
}
-// CHECK-LABEL: test_vmul_s8
-// CHECK: vmul.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[MUL_I]]
int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) {
return vmul_s8(a, b);
}
-// CHECK-LABEL: test_vmul_s16
-// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[MUL_I]]
int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) {
return vmul_s16(a, b);
}
-// CHECK-LABEL: test_vmul_s32
-// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[MUL_I]]
int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) {
return vmul_s32(a, b);
}
-// CHECK-LABEL: test_vmul_f32
-// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, %b
+// CHECK: ret <2 x float> [[MUL_I]]
float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) {
return vmul_f32(a, b);
}
-// CHECK-LABEL: test_vmul_u8
-// CHECK: vmul.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[MUL_I]]
uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) {
return vmul_u8(a, b);
}
-// CHECK-LABEL: test_vmul_u16
-// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[MUL_I]]
uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) {
return vmul_u16(a, b);
}
-// CHECK-LABEL: test_vmul_u32
-// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[MUL_I]]
uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) {
return vmul_u32(a, b);
}
-// CHECK-LABEL: test_vmulq_s8
-// CHECK: vmul.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[MUL_I]]
int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) {
return vmulq_s8(a, b);
}
-// CHECK-LABEL: test_vmulq_s16
-// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[MUL_I]]
int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) {
return vmulq_s16(a, b);
}
-// CHECK-LABEL: test_vmulq_s32
-// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[MUL_I]]
int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) {
return vmulq_s32(a, b);
}
-// CHECK-LABEL: test_vmulq_f32
-// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, %b
+// CHECK: ret <4 x float> [[MUL_I]]
float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) {
return vmulq_f32(a, b);
}
-// CHECK-LABEL: test_vmulq_u8
-// CHECK: vmul.i8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[MUL_I]]
uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) {
return vmulq_u8(a, b);
}
-// CHECK-LABEL: test_vmulq_u16
-// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[MUL_I]]
uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) {
return vmulq_u16(a, b);
}
-// CHECK-LABEL: test_vmulq_u32
-// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[MUL_I]]
uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) {
return vmulq_u32(a, b);
}
-// CHECK-LABEL: test_vmull_s8
-// CHECK: vmull.s8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i16> [[VMULL_I]]
int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
return vmull_s8(a, b);
}
-// CHECK-LABEL: test_vmull_s16
-// CHECK: vmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL2_I]]
int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
return vmull_s16(a, b);
}
-// CHECK-LABEL: test_vmull_s32
-// CHECK: vmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL2_I]]
int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
return vmull_s32(a, b);
}
-// CHECK-LABEL: test_vmull_u8
-// CHECK: vmull.u8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i16> [[VMULL_I]]
uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
return vmull_u8(a, b);
}
-// CHECK-LABEL: test_vmull_u16
-// CHECK: vmull.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL2_I]]
uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
return vmull_u16(a, b);
}
-// CHECK-LABEL: test_vmull_u32
-// CHECK: vmull.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL2_I]]
uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
return vmull_u32(a, b);
}
-// CHECK-LABEL: test_vmull_p8
-// CHECK: vmull.p8 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i16> [[VMULL_I]]
poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
return vmull_p8(a, b);
}
-// CHECK-LABEL: test_vmull_lane_s16
-// CHECK: vmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL2_I]]
int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) {
return vmull_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vmull_lane_s32
-// CHECK: vmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL2_I]]
int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) {
return vmull_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vmull_lane_u16
-// CHECK: vmull.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL2_I]]
uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) {
return vmull_lane_u16(a, b, 3);
}
-// CHECK-LABEL: test_vmull_lane_u32
-// CHECK: vmull.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL2_I]]
uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) {
return vmull_lane_u32(a, b, 1);
}
-// CHECK-LABEL: test_vmull_n_s16
-// CHECK: vmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmull_n_s16(<4 x i16> %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL5_I]]
int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
return vmull_n_s16(a, b);
}
-// CHECK-LABEL: test_vmull_n_s32
-// CHECK: vmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmull_n_s32(<2 x i32> %a, i32 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL3_I]]
int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
return vmull_n_s32(a, b);
}
-// CHECK-LABEL: test_vmull_n_u16
-// CHECK: vmull.u16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmull_n_u16(<4 x i16> %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL5_I]]
uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
return vmull_n_u16(a, b);
}
-// CHECK-LABEL: test_vmull_n_u32
-// CHECK: vmull.u32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vmull_n_u32(<2 x i32> %a, i32 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL3_I]]
uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
return vmull_n_u32(a, b);
}
-// CHECK-LABEL: test_vmul_p8
-// CHECK: vmul.p8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VMUL_V_I]]
poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) {
return vmul_p8(a, b);
}
-// CHECK-LABEL: test_vmulq_p8
-// CHECK: vmul.p8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VMULQ_V_I]]
poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) {
return vmulq_p8(a, b);
}
-// CHECK-LABEL: test_vmul_lane_s16
-// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
+// CHECK: ret <4 x i16> [[MUL]]
int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) {
return vmul_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vmul_lane_s32
-// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
+// CHECK: ret <2 x i32> [[MUL]]
int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) {
return vmul_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vmul_lane_f32
-// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <2 x i32> <i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]]
+// CHECK: ret <2 x float> [[MUL]]
float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) {
return vmul_lane_f32(a, b, 1);
}
-// CHECK-LABEL: test_vmul_lane_u16
-// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]]
+// CHECK: ret <4 x i16> [[MUL]]
uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) {
return vmul_lane_u16(a, b, 3);
}
-// CHECK-LABEL: test_vmul_lane_u32
-// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]]
+// CHECK: ret <2 x i32> [[MUL]]
uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) {
return vmul_lane_u32(a, b, 1);
}
-// CHECK-LABEL: test_vmulq_lane_s16
-// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
+// CHECK: ret <8 x i16> [[MUL]]
int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) {
return vmulq_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vmulq_lane_s32
-// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
+// CHECK: ret <4 x i32> [[MUL]]
int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) {
return vmulq_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vmulq_lane_f32
-// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]]
+// CHECK: ret <4 x float> [[MUL]]
float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) {
return vmulq_lane_f32(a, b, 1);
}
-// CHECK-LABEL: test_vmulq_lane_u16
-// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]]
+// CHECK: ret <8 x i16> [[MUL]]
uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) {
return vmulq_lane_u16(a, b, 3);
}
-// CHECK-LABEL: test_vmulq_lane_u32
-// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]]
+// CHECK: ret <4 x i32> [[MUL]]
uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) {
return vmulq_lane_u32(a, b, 1);
}
-// CHECK-LABEL: test_vmul_n_s16
-// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmul_n_s16(<4 x i16> %a, i16 %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
+// CHECK: ret <4 x i16> [[MUL_I]]
int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
return vmul_n_s16(a, b);
}
-// CHECK-LABEL: test_vmul_n_s32
-// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmul_n_s32(<2 x i32> %a, i32 %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
+// CHECK: ret <2 x i32> [[MUL_I]]
int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
return vmul_n_s32(a, b);
}
-// CHECK-LABEL: test_vmul_n_f32
-// CHECK: vmul.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1
+// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]]
+// CHECK: ret <2 x float> [[MUL_I]]
float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
return vmul_n_f32(a, b);
}
-// CHECK-LABEL: test_vmul_n_u16
-// CHECK: vmul.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmul_n_u16(<4 x i16> %a, i16 %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
+// CHECK: ret <4 x i16> [[MUL_I]]
uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
return vmul_n_u16(a, b);
}
-// CHECK-LABEL: test_vmul_n_u32
-// CHECK: vmul.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmul_n_u32(<2 x i32> %a, i32 %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
+// CHECK: ret <2 x i32> [[MUL_I]]
uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
return vmul_n_u32(a, b);
}
-// CHECK-LABEL: test_vmulq_n_s16
-// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmulq_n_s16(<8 x i16> %a, i16 %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
+// CHECK: ret <8 x i16> [[MUL_I]]
int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
return vmulq_n_s16(a, b);
}
-// CHECK-LABEL: test_vmulq_n_s32
-// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmulq_n_s32(<4 x i32> %a, i32 %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
+// CHECK: ret <4 x i32> [[MUL_I]]
int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
return vmulq_n_s32(a, b);
}
-// CHECK-LABEL: test_vmulq_n_f32
-// CHECK: vmul.f32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0]
+// CHECK-LABEL: define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3
+// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]]
+// CHECK: ret <4 x float> [[MUL_I]]
float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
return vmulq_n_f32(a, b);
}
-// CHECK-LABEL: test_vmulq_n_u16
-// CHECK: vmul.i16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmulq_n_u16(<8 x i16> %a, i16 %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
+// CHECK: ret <8 x i16> [[MUL_I]]
uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
return vmulq_n_u16(a, b);
}
-// CHECK-LABEL: test_vmulq_n_u32
-// CHECK: vmul.i32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmulq_n_u32(<4 x i32> %a, i32 %b) #0 {
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
+// CHECK: ret <4 x i32> [[MUL_I]]
uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
return vmulq_n_u32(a, b);
}
-// CHECK-LABEL: test_vmvn_s8
-// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: ret <8 x i8> [[NEG_I]]
int8x8_t test_vmvn_s8(int8x8_t a) {
return vmvn_s8(a);
}
-// CHECK-LABEL: test_vmvn_s16
-// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: ret <4 x i16> [[NEG_I]]
int16x4_t test_vmvn_s16(int16x4_t a) {
return vmvn_s16(a);
}
-// CHECK-LABEL: test_vmvn_s32
-// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
+// CHECK: ret <2 x i32> [[NEG_I]]
int32x2_t test_vmvn_s32(int32x2_t a) {
return vmvn_s32(a);
}
-// CHECK-LABEL: test_vmvn_u8
-// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmvn_u8(<8 x i8> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: ret <8 x i8> [[NEG_I]]
uint8x8_t test_vmvn_u8(uint8x8_t a) {
return vmvn_u8(a);
}
-// CHECK-LABEL: test_vmvn_u16
-// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vmvn_u16(<4 x i16> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: ret <4 x i16> [[NEG_I]]
uint16x4_t test_vmvn_u16(uint16x4_t a) {
return vmvn_u16(a);
}
-// CHECK-LABEL: test_vmvn_u32
-// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vmvn_u32(<2 x i32> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
+// CHECK: ret <2 x i32> [[NEG_I]]
uint32x2_t test_vmvn_u32(uint32x2_t a) {
return vmvn_u32(a);
}
-// CHECK-LABEL: test_vmvn_p8
-// CHECK: vmvn d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vmvn_p8(<8 x i8> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: ret <8 x i8> [[NEG_I]]
poly8x8_t test_vmvn_p8(poly8x8_t a) {
return vmvn_p8(a);
}
-// CHECK-LABEL: test_vmvnq_s8
-// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: ret <16 x i8> [[NEG_I]]
int8x16_t test_vmvnq_s8(int8x16_t a) {
return vmvnq_s8(a);
}
-// CHECK-LABEL: test_vmvnq_s16
-// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: ret <8 x i16> [[NEG_I]]
int16x8_t test_vmvnq_s16(int16x8_t a) {
return vmvnq_s16(a);
}
-// CHECK-LABEL: test_vmvnq_s32
-// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: ret <4 x i32> [[NEG_I]]
int32x4_t test_vmvnq_s32(int32x4_t a) {
return vmvnq_s32(a);
}
-// CHECK-LABEL: test_vmvnq_u8
-// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmvnq_u8(<16 x i8> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: ret <16 x i8> [[NEG_I]]
uint8x16_t test_vmvnq_u8(uint8x16_t a) {
return vmvnq_u8(a);
}
-// CHECK-LABEL: test_vmvnq_u16
-// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vmvnq_u16(<8 x i16> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: ret <8 x i16> [[NEG_I]]
uint16x8_t test_vmvnq_u16(uint16x8_t a) {
return vmvnq_u16(a);
}
-// CHECK-LABEL: test_vmvnq_u32
-// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vmvnq_u32(<4 x i32> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: ret <4 x i32> [[NEG_I]]
uint32x4_t test_vmvnq_u32(uint32x4_t a) {
return vmvnq_u32(a);
}
-// CHECK-LABEL: test_vmvnq_p8
-// CHECK: vmvn q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vmvnq_p8(<16 x i8> %a) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: ret <16 x i8> [[NEG_I]]
poly8x16_t test_vmvnq_p8(poly8x16_t a) {
return vmvnq_p8(a);
}
-// CHECK-LABEL: test_vneg_s8
-// CHECK: vneg.s8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
+// CHECK: ret <8 x i8> [[SUB_I]]
int8x8_t test_vneg_s8(int8x8_t a) {
return vneg_s8(a);
}
-// CHECK-LABEL: test_vneg_s16
-// CHECK: vneg.s16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
+// CHECK: ret <4 x i16> [[SUB_I]]
int16x4_t test_vneg_s16(int16x4_t a) {
return vneg_s16(a);
}
-// CHECK-LABEL: test_vneg_s32
-// CHECK: vneg.s32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
+// CHECK: ret <2 x i32> [[SUB_I]]
int32x2_t test_vneg_s32(int32x2_t a) {
return vneg_s32(a);
}
-// CHECK-LABEL: test_vneg_f32
-// CHECK: vneg.f32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
+// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
+// CHECK: ret <2 x float> [[SUB_I]]
float32x2_t test_vneg_f32(float32x2_t a) {
return vneg_f32(a);
}
-// CHECK-LABEL: test_vnegq_s8
-// CHECK: vneg.s8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
+// CHECK: ret <16 x i8> [[SUB_I]]
int8x16_t test_vnegq_s8(int8x16_t a) {
return vnegq_s8(a);
}
-// CHECK-LABEL: test_vnegq_s16
-// CHECK: vneg.s16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vnegq_s16(int16x8_t a) {
return vnegq_s16(a);
}
-// CHECK-LABEL: test_vnegq_s32
-// CHECK: vneg.s32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vnegq_s32(int32x4_t a) {
return vnegq_s32(a);
}
-// CHECK-LABEL: test_vnegq_f32
-// CHECK: vneg.f32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
+// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+// CHECK: ret <4 x float> [[SUB_I]]
float32x4_t test_vnegq_f32(float32x4_t a) {
return vnegq_f32(a);
}
-// CHECK-LABEL: test_vorn_s8
-// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vorn_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
+// CHECK: ret <8 x i8> [[OR_I]]
int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
return vorn_s8(a, b);
}
-// CHECK-LABEL: test_vorn_s16
-// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vorn_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
+// CHECK: ret <4 x i16> [[OR_I]]
int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
return vorn_s16(a, b);
}
-// CHECK-LABEL: test_vorn_s32
-// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vorn_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
+// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
+// CHECK: ret <2 x i32> [[OR_I]]
int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
return vorn_s32(a, b);
}
-// CHECK-LABEL: test_vorn_s64
-// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vorn_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
+// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
+// CHECK: ret <1 x i64> [[OR_I]]
int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
return vorn_s64(a, b);
}
-// CHECK-LABEL: test_vorn_u8
-// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vorn_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
+// CHECK: ret <8 x i8> [[OR_I]]
uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
return vorn_u8(a, b);
}
-// CHECK-LABEL: test_vorn_u16
-// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vorn_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
+// CHECK: ret <4 x i16> [[OR_I]]
uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
return vorn_u16(a, b);
}
-// CHECK-LABEL: test_vorn_u32
-// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vorn_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
+// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
+// CHECK: ret <2 x i32> [[OR_I]]
uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
return vorn_u32(a, b);
}
-// CHECK-LABEL: test_vorn_u64
-// CHECK: vorn d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vorn_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
+// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
+// CHECK: ret <1 x i64> [[OR_I]]
uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
return vorn_u64(a, b);
}
-// CHECK-LABEL: test_vornq_s8
-// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
+// CHECK: ret <16 x i8> [[OR_I]]
int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
return vornq_s8(a, b);
}
-// CHECK-LABEL: test_vornq_s16
-// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
+// CHECK: ret <8 x i16> [[OR_I]]
int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
return vornq_s16(a, b);
}
-// CHECK-LABEL: test_vornq_s32
-// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
+// CHECK: ret <4 x i32> [[OR_I]]
int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
return vornq_s32(a, b);
}
-// CHECK-LABEL: test_vornq_s64
-// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
+// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
+// CHECK: ret <2 x i64> [[OR_I]]
int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
return vornq_s64(a, b);
}
-// CHECK-LABEL: test_vornq_u8
-// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
+// CHECK: ret <16 x i8> [[OR_I]]
uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
return vornq_u8(a, b);
}
-// CHECK-LABEL: test_vornq_u16
-// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
+// CHECK: ret <8 x i16> [[OR_I]]
uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
return vornq_u16(a, b);
}
-// CHECK-LABEL: test_vornq_u32
-// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
+// CHECK: ret <4 x i32> [[OR_I]]
uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
return vornq_u32(a, b);
}
-// CHECK-LABEL: test_vornq_u64
-// CHECK: vorn q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
+// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
+// CHECK: ret <2 x i64> [[OR_I]]
uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
return vornq_u64(a, b);
}
-// CHECK-LABEL: test_vorr_s8
-// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vorr_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[OR_I]]
int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
return vorr_s8(a, b);
}
-// CHECK-LABEL: test_vorr_s16
-// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vorr_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[OR_I]]
int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
return vorr_s16(a, b);
}
-// CHECK-LABEL: test_vorr_s32
-// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vorr_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[OR_I]]
int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
return vorr_s32(a, b);
}
-// CHECK-LABEL: test_vorr_s64
-// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vorr_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
+// CHECK: ret <1 x i64> [[OR_I]]
int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
return vorr_s64(a, b);
}
-// CHECK-LABEL: test_vorr_u8
-// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vorr_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
+// CHECK: ret <8 x i8> [[OR_I]]
uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
return vorr_u8(a, b);
}
-// CHECK-LABEL: test_vorr_u16
-// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vorr_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
+// CHECK: ret <4 x i16> [[OR_I]]
uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
return vorr_u16(a, b);
}
-// CHECK-LABEL: test_vorr_u32
-// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vorr_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
+// CHECK: ret <2 x i32> [[OR_I]]
uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
return vorr_u32(a, b);
}
-// CHECK-LABEL: test_vorr_u64
-// CHECK: vorr d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vorr_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
+// CHECK: ret <1 x i64> [[OR_I]]
uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
return vorr_u64(a, b);
}
-// CHECK-LABEL: test_vorrq_s8
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[OR_I]]
int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
return vorrq_s8(a, b);
}
-// CHECK-LABEL: test_vorrq_s16
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[OR_I]]
int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
return vorrq_s16(a, b);
}
-// CHECK-LABEL: test_vorrq_s32
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[OR_I]]
int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
return vorrq_s32(a, b);
}
-// CHECK-LABEL: test_vorrq_s64
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
+// CHECK: ret <2 x i64> [[OR_I]]
int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
return vorrq_s64(a, b);
}
-// CHECK-LABEL: test_vorrq_u8
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
+// CHECK: ret <16 x i8> [[OR_I]]
uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
return vorrq_u8(a, b);
}
-// CHECK-LABEL: test_vorrq_u16
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
+// CHECK: ret <8 x i16> [[OR_I]]
uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
return vorrq_u16(a, b);
}
-// CHECK-LABEL: test_vorrq_u32
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
+// CHECK: ret <4 x i32> [[OR_I]]
uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
return vorrq_u32(a, b);
}
-// CHECK-LABEL: test_vorrq_u64
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
+// CHECK: ret <2 x i64> [[OR_I]]
uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
return vorrq_u64(a, b);
}
-// CHECK-LABEL: test_vpadal_s8
-// CHECK: vpadal.s8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %b) #4
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]]
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
return vpadal_s8(a, b);
}
-// CHECK-LABEL: test_vpadal_s16
-// CHECK: vpadal.s16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]]
+// CHECK: ret <2 x i32> [[TMP3]]
int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
return vpadal_s16(a, b);
}
-// CHECK-LABEL: test_vpadal_s32
-// CHECK: vpadal.s32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]]
+// CHECK: ret <1 x i64> [[TMP3]]
int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
return vpadal_s32(a, b);
}
-// CHECK-LABEL: test_vpadal_u8
-// CHECK: vpadal.u8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %b) #4
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]]
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
return vpadal_u8(a, b);
}
-// CHECK-LABEL: test_vpadal_u16
-// CHECK: vpadal.u16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]]
+// CHECK: ret <2 x i32> [[TMP3]]
uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
return vpadal_u16(a, b);
}
-// CHECK-LABEL: test_vpadal_u32
-// CHECK: vpadal.u32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]]
+// CHECK: ret <1 x i64> [[TMP3]]
uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
return vpadal_u32(a, b);
}
-// CHECK-LABEL: test_vpadalq_s8
-// CHECK: vpadal.s8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %b) #4
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]]
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
return vpadalq_s8(a, b);
}
-// CHECK-LABEL: test_vpadalq_s16
-// CHECK: vpadal.s16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]]
+// CHECK: ret <4 x i32> [[TMP3]]
int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
return vpadalq_s16(a, b);
}
-// CHECK-LABEL: test_vpadalq_s32
-// CHECK: vpadal.s32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]]
+// CHECK: ret <2 x i64> [[TMP3]]
int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
return vpadalq_s32(a, b);
}
-// CHECK-LABEL: test_vpadalq_u8
-// CHECK: vpadal.u8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %b) #4
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]]
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
return vpadalq_u8(a, b);
}
-// CHECK-LABEL: test_vpadalq_u16
-// CHECK: vpadal.u16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]]
+// CHECK: ret <4 x i32> [[TMP3]]
uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
return vpadalq_u16(a, b);
}
-// CHECK-LABEL: test_vpadalq_u32
-// CHECK: vpadal.u32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]]
+// CHECK: ret <2 x i64> [[TMP3]]
uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
return vpadalq_u32(a, b);
}
-// CHECK-LABEL: test_vpadd_s8
-// CHECK: vpadd.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPADD_V_I]]
int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
return vpadd_s8(a, b);
}
-// CHECK-LABEL: test_vpadd_s16
-// CHECK: vpadd.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
return vpadd_s16(a, b);
}
-// CHECK-LABEL: test_vpadd_s32
-// CHECK: vpadd.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
return vpadd_s32(a, b);
}
-// CHECK-LABEL: test_vpadd_u8
-// CHECK: vpadd.i8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPADD_V_I]]
uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
return vpadd_u8(a, b);
}
-// CHECK-LABEL: test_vpadd_u16
-// CHECK: vpadd.i16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
return vpadd_u16(a, b);
}
-// CHECK-LABEL: test_vpadd_u32
-// CHECK: vpadd.i32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
return vpadd_u32(a, b);
}
-// CHECK-LABEL: test_vpadd_f32
-// CHECK: vpadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float>
+// CHECK: ret <2 x float> [[TMP2]]
float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
return vpadd_f32(a, b);
}
-// CHECK-LABEL: test_vpaddl_s8
-// CHECK: vpaddl.s8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
+// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %a) #4
+// CHECK: ret <4 x i16> [[VPADDL_I]]
int16x4_t test_vpaddl_s8(int8x8_t a) {
return vpaddl_s8(a);
}
-// CHECK-LABEL: test_vpaddl_s16
-// CHECK: vpaddl.s16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4
+// CHECK: ret <2 x i32> [[VPADDL1_I]]
int32x2_t test_vpaddl_s16(int16x4_t a) {
return vpaddl_s16(a);
}
-// CHECK-LABEL: test_vpaddl_s32
-// CHECK: vpaddl.s32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4
+// CHECK: ret <1 x i64> [[VPADDL1_I]]
int64x1_t test_vpaddl_s32(int32x2_t a) {
return vpaddl_s32(a);
}
-// CHECK-LABEL: test_vpaddl_u8
-// CHECK: vpaddl.u8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
+// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %a) #4
+// CHECK: ret <4 x i16> [[VPADDL_I]]
uint16x4_t test_vpaddl_u8(uint8x8_t a) {
return vpaddl_u8(a);
}
-// CHECK-LABEL: test_vpaddl_u16
-// CHECK: vpaddl.u16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4
+// CHECK: ret <2 x i32> [[VPADDL1_I]]
uint32x2_t test_vpaddl_u16(uint16x4_t a) {
return vpaddl_u16(a);
}
-// CHECK-LABEL: test_vpaddl_u32
-// CHECK: vpaddl.u32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4
+// CHECK: ret <1 x i64> [[VPADDL1_I]]
uint64x1_t test_vpaddl_u32(uint32x2_t a) {
return vpaddl_u32(a);
}
-// CHECK-LABEL: test_vpaddlq_s8
-// CHECK: vpaddl.s8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
+// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %a) #4
+// CHECK: ret <8 x i16> [[VPADDL_I]]
int16x8_t test_vpaddlq_s8(int8x16_t a) {
return vpaddlq_s8(a);
}
-// CHECK-LABEL: test_vpaddlq_s16
-// CHECK: vpaddl.s16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4
+// CHECK: ret <4 x i32> [[VPADDL1_I]]
int32x4_t test_vpaddlq_s16(int16x8_t a) {
return vpaddlq_s16(a);
}
-// CHECK-LABEL: test_vpaddlq_s32
-// CHECK: vpaddl.s32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4
+// CHECK: ret <2 x i64> [[VPADDL1_I]]
int64x2_t test_vpaddlq_s32(int32x4_t a) {
return vpaddlq_s32(a);
}
-// CHECK-LABEL: test_vpaddlq_u8
-// CHECK: vpaddl.u8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
+// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a) #4
+// CHECK: ret <8 x i16> [[VPADDL_I]]
uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
return vpaddlq_u8(a);
}
-// CHECK-LABEL: test_vpaddlq_u16
-// CHECK: vpaddl.u16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4
+// CHECK: ret <4 x i32> [[VPADDL1_I]]
uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
return vpaddlq_u16(a);
}
-// CHECK-LABEL: test_vpaddlq_u32
-// CHECK: vpaddl.u32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4
+// CHECK: ret <2 x i64> [[VPADDL1_I]]
uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
return vpaddlq_u32(a);
}
-// CHECK-LABEL: test_vpmax_s8
-// CHECK: vpmax.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPMAX_I]]
int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
return vpmax_s8(a, b);
}
-// CHECK-LABEL: test_vpmax_s16
-// CHECK: vpmax.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
+// CHECK: ret <4 x i16> [[VPMAX2_I]]
int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
return vpmax_s16(a, b);
}
-// CHECK-LABEL: test_vpmax_s32
-// CHECK: vpmax.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
+// CHECK: ret <2 x i32> [[VPMAX2_I]]
int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
return vpmax_s32(a, b);
}
-// CHECK-LABEL: test_vpmax_u8
-// CHECK: vpmax.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPMAX_I]]
uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
return vpmax_u8(a, b);
}
-// CHECK-LABEL: test_vpmax_u16
-// CHECK: vpmax.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
+// CHECK: ret <4 x i16> [[VPMAX2_I]]
uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
return vpmax_u16(a, b);
}
-// CHECK-LABEL: test_vpmax_u32
-// CHECK: vpmax.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
+// CHECK: ret <2 x i32> [[VPMAX2_I]]
uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
return vpmax_u32(a, b);
}
-// CHECK-LABEL: test_vpmax_f32
-// CHECK: vpmax.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) #4
+// CHECK: ret <2 x float> [[VPMAX2_I]]
float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
return vpmax_f32(a, b);
}
-// CHECK-LABEL: test_vpmin_s8
-// CHECK: vpmin.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPMIN_I]]
int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
return vpmin_s8(a, b);
}
-// CHECK-LABEL: test_vpmin_s16
-// CHECK: vpmin.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
+// CHECK: ret <4 x i16> [[VPMIN2_I]]
int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
return vpmin_s16(a, b);
}
-// CHECK-LABEL: test_vpmin_s32
-// CHECK: vpmin.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
+// CHECK: ret <2 x i32> [[VPMIN2_I]]
int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
return vpmin_s32(a, b);
}
-// CHECK-LABEL: test_vpmin_u8
-// CHECK: vpmin.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPMIN_I]]
uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
return vpmin_u8(a, b);
}
-// CHECK-LABEL: test_vpmin_u16
-// CHECK: vpmin.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
+// CHECK: ret <4 x i16> [[VPMIN2_I]]
uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
return vpmin_u16(a, b);
}
-// CHECK-LABEL: test_vpmin_u32
-// CHECK: vpmin.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
+// CHECK: ret <2 x i32> [[VPMIN2_I]]
uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
return vpmin_u32(a, b);
}
-// CHECK-LABEL: test_vpmin_f32
-// CHECK: vpmin.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> [[VPMIN1_I]]) #4
+// CHECK: ret <2 x float> [[VPMIN2_I]]
float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
return vpmin_f32(a, b);
}
-// CHECK-LABEL: test_vqabs_s8
-// CHECK: vqabs.s8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
+// CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %a) #4
+// CHECK: ret <8 x i8> [[VQABS_V_I]]
int8x8_t test_vqabs_s8(int8x8_t a) {
return vqabs_s8(a);
}
-// CHECK-LABEL: test_vqabs_s16
-// CHECK: vqabs.s16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[VQABS_V_I]]) #4
+// CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP1]]
int16x4_t test_vqabs_s16(int16x4_t a) {
return vqabs_s16(a);
}
-// CHECK-LABEL: test_vqabs_s32
-// CHECK: vqabs.s32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> [[VQABS_V_I]]) #4
+// CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP1]]
int32x2_t test_vqabs_s32(int32x2_t a) {
return vqabs_s32(a);
}
-// CHECK-LABEL: test_vqabsq_s8
-// CHECK: vqabs.s8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
+// CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %a) #4
+// CHECK: ret <16 x i8> [[VQABSQ_V_I]]
int8x16_t test_vqabsq_s8(int8x16_t a) {
return vqabsq_s8(a);
}
-// CHECK-LABEL: test_vqabsq_s16
-// CHECK: vqabs.s16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> [[VQABSQ_V_I]]) #4
+// CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP1]]
int16x8_t test_vqabsq_s16(int16x8_t a) {
return vqabsq_s16(a);
}
-// CHECK-LABEL: test_vqabsq_s32
-// CHECK: vqabs.s32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> [[VQABSQ_V_I]]) #4
+// CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP1]]
int32x4_t test_vqabsq_s32(int32x4_t a) {
return vqabsq_s32(a);
}
-// CHECK-LABEL: test_vqadd_s8
-// CHECK: vqadd.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQADD_V_I]]
int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
return vqadd_s8(a, b);
}
-// CHECK-LABEL: test_vqadd_s16
-// CHECK: vqadd.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
return vqadd_s16(a, b);
}
-// CHECK-LABEL: test_vqadd_s32
-// CHECK: vqadd.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
return vqadd_s32(a, b);
}
-// CHECK-LABEL: test_vqadd_s64
-// CHECK: vqadd.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
return vqadd_s64(a, b);
}
-// CHECK-LABEL: test_vqadd_u8
-// CHECK: vqadd.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQADD_V_I]]
uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
return vqadd_u8(a, b);
}
-// CHECK-LABEL: test_vqadd_u16
-// CHECK: vqadd.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
return vqadd_u16(a, b);
}
-// CHECK-LABEL: test_vqadd_u32
-// CHECK: vqadd.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
return vqadd_u32(a, b);
}
-// CHECK-LABEL: test_vqadd_u64
-// CHECK: vqadd.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
return vqadd_u64(a, b);
}
-// CHECK-LABEL: test_vqaddq_s8
-// CHECK: vqadd.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQADDQ_V_I]]
int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
return vqaddq_s8(a, b);
}
-// CHECK-LABEL: test_vqaddq_s16
-// CHECK: vqadd.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
return vqaddq_s16(a, b);
}
-// CHECK-LABEL: test_vqaddq_s32
-// CHECK: vqadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
return vqaddq_s32(a, b);
}
-// CHECK-LABEL: test_vqaddq_s64
-// CHECK: vqadd.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
return vqaddq_s64(a, b);
}
-// CHECK-LABEL: test_vqaddq_u8
-// CHECK: vqadd.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQADDQ_V_I]]
uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
return vqaddq_u8(a, b);
}
-// CHECK-LABEL: test_vqaddq_u16
-// CHECK: vqadd.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
return vqaddq_u16(a, b);
}
-// CHECK-LABEL: test_vqaddq_u32
-// CHECK: vqadd.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
return vqaddq_u32(a, b);
}
-// CHECK-LABEL: test_vqaddq_u64
-// CHECK: vqadd.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
return vqaddq_u64(a, b);
}
-// CHECK-LABEL: test_vqdmlal_s16
-// CHECK: vqdmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlal_s16(a, b, c);
}
-// CHECK-LABEL: test_vqdmlal_s32
-// CHECK: vqdmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlal_s32(a, b, c);
}
-// CHECK-LABEL: test_vqdmlal_lane_s16
-// CHECK: vqdmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlal_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: test_vqdmlal_lane_s32
-// CHECK: vqdmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlal_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: test_vqdmlal_n_s16
-// CHECK: vqdmlal.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqdmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4
+// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLAL_V6_I]]
int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
return vqdmlal_n_s16(a, b, c);
}
-// CHECK-LABEL: test_vqdmlal_n_s32
-// CHECK: vqdmlal.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqdmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4
+// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLAL_V4_I]]
int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
return vqdmlal_n_s32(a, b, c);
}
-// CHECK-LABEL: test_vqdmlsl_s16
-// CHECK: vqdmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlsl_s16(a, b, c);
}
-// CHECK-LABEL: test_vqdmlsl_s32
-// CHECK: vqdmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlsl_s32(a, b, c);
}
-// CHECK-LABEL: test_vqdmlsl_lane_s16
-// CHECK: vqdmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlsl_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: test_vqdmlsl_lane_s32
-// CHECK: vqdmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlsl_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: test_vqdmlsl_n_s16
-// CHECK: vqdmlsl.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4
+// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLSL_V6_I]]
int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
return vqdmlsl_n_s16(a, b, c);
}
-// CHECK-LABEL: test_vqdmlsl_n_s32
-// CHECK: vqdmlsl.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4
+// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLSL_V4_I]]
int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
return vqdmlsl_n_s32(a, b, c);
}
-// CHECK-LABEL: test_vqdmulh_s16
-// CHECK: vqdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4
+// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
return vqdmulh_s16(a, b);
}
-// CHECK-LABEL: test_vqdmulh_s32
-// CHECK: vqdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4
+// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
return vqdmulh_s32(a, b);
}
-// CHECK-LABEL: test_vqdmulhq_s16
-// CHECK: vqdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4
+// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
return vqdmulhq_s16(a, b);
}
-// CHECK-LABEL: test_vqdmulhq_s32
-// CHECK: vqdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4
+// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
return vqdmulhq_s32(a, b);
}
-// CHECK-LABEL: test_vqdmulh_lane_s16
-// CHECK: vqdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4
+// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) {
return vqdmulh_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vqdmulh_lane_s32
-// CHECK: vqdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4
+// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) {
return vqdmulh_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vqdmulhq_lane_s16
-// CHECK: vqdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4
+// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
return vqdmulhq_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vqdmulhq_lane_s32
-// CHECK: vqdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4
+// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
return vqdmulhq_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vqdmulh_n_s16
-// CHECK: vqdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqdmulh_n_s16(<4 x i16> %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V4_I]]) #4
+// CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V6_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
return vqdmulh_n_s16(a, b);
}
-// CHECK-LABEL: test_vqdmulh_n_s32
-// CHECK: vqdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqdmulh_n_s32(<2 x i32> %a, i32 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V2_I]]) #4
+// CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V4_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
return vqdmulh_n_s32(a, b);
}
-// CHECK-LABEL: test_vqdmulhq_n_s16
-// CHECK: vqdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_n_s16(<8 x i16> %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
+// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V8_I]]) #4
+// CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V10_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
return vqdmulhq_n_s16(a, b);
}
-// CHECK-LABEL: test_vqdmulhq_n_s32
-// CHECK: vqdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
+// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V4_I]]) #4
+// CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V6_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
return vqdmulhq_n_s32(a, b);
}
-// CHECK-LABEL: test_vqdmull_s16
-// CHECK: vqdmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4
+// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
return vqdmull_s16(a, b);
}
-// CHECK-LABEL: test_vqdmull_s32
-// CHECK: vqdmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4
+// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
return vqdmull_s32(a, b);
}
-// CHECK-LABEL: test_vqdmull_lane_s16
-// CHECK: vqdmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4
+// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) {
return vqdmull_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vqdmull_lane_s32
-// CHECK: vqdmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4
+// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) {
return vqdmull_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vqdmull_n_s16
-// CHECK: vqdmull.s16 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqdmull_n_s16(<4 x i16> %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V4_I]]) #4
+// CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
return vqdmull_n_s16(a, b);
}
-// CHECK-LABEL: test_vqdmull_n_s32
-// CHECK: vqdmull.s32 q{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqdmull_n_s32(<2 x i32> %a, i32 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQDMULL_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V2_I]]) #4
+// CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
return vqdmull_n_s32(a, b);
}
-// CHECK-LABEL: test_vqmovn_s16
-// CHECK: vqmovn.s16 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4
+// CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
int8x8_t test_vqmovn_s16(int16x8_t a) {
return vqmovn_s16(a);
}
-// CHECK-LABEL: test_vqmovn_s32
-// CHECK: vqmovn.s32 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4
+// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP1]]
int16x4_t test_vqmovn_s32(int32x4_t a) {
return vqmovn_s32(a);
}
-// CHECK-LABEL: test_vqmovn_s64
-// CHECK: vqmovn.s64 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4
+// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP1]]
int32x2_t test_vqmovn_s64(int64x2_t a) {
return vqmovn_s64(a);
}
-// CHECK-LABEL: test_vqmovn_u16
-// CHECK: vqmovn.u16 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4
+// CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
uint8x8_t test_vqmovn_u16(uint16x8_t a) {
return vqmovn_u16(a);
}
-// CHECK-LABEL: test_vqmovn_u32
-// CHECK: vqmovn.u32 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4
+// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP1]]
uint16x4_t test_vqmovn_u32(uint32x4_t a) {
return vqmovn_u32(a);
}
-// CHECK-LABEL: test_vqmovn_u64
-// CHECK: vqmovn.u64 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4
+// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP1]]
uint32x2_t test_vqmovn_u64(uint64x2_t a) {
return vqmovn_u64(a);
}
-// CHECK-LABEL: test_vqmovun_s16
-// CHECK: vqmovun.s16 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[VQMOVUN_V_I]]) #4
+// CHECK: ret <8 x i8> [[VQMOVUN_V1_I]]
uint8x8_t test_vqmovun_s16(int16x8_t a) {
return vqmovun_s16(a);
}
-// CHECK-LABEL: test_vqmovun_s32
-// CHECK: vqmovun.s32 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[VQMOVUN_V_I]]) #4
+// CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP1]]
uint16x4_t test_vqmovun_s32(int32x4_t a) {
return vqmovun_s32(a);
}
-// CHECK-LABEL: test_vqmovun_s64
-// CHECK: vqmovun.s64 d{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[VQMOVUN_V_I]]) #4
+// CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP1]]
uint32x2_t test_vqmovun_s64(int64x2_t a) {
return vqmovun_s64(a);
}
-// CHECK-LABEL: test_vqneg_s8
-// CHECK: vqneg.s8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
+// CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %a) #4
+// CHECK: ret <8 x i8> [[VQNEG_V_I]]
int8x8_t test_vqneg_s8(int8x8_t a) {
return vqneg_s8(a);
}
-// CHECK-LABEL: test_vqneg_s16
-// CHECK: vqneg.s16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[VQNEG_V_I]]) #4
+// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP1]]
int16x4_t test_vqneg_s16(int16x4_t a) {
return vqneg_s16(a);
}
-// CHECK-LABEL: test_vqneg_s32
-// CHECK: vqneg.s32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> [[VQNEG_V_I]]) #4
+// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP1]]
int32x2_t test_vqneg_s32(int32x2_t a) {
return vqneg_s32(a);
}
-// CHECK-LABEL: test_vqnegq_s8
-// CHECK: vqneg.s8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
+// CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %a) #4
+// CHECK: ret <16 x i8> [[VQNEGQ_V_I]]
int8x16_t test_vqnegq_s8(int8x16_t a) {
return vqnegq_s8(a);
}
-// CHECK-LABEL: test_vqnegq_s16
-// CHECK: vqneg.s16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]]) #4
+// CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP1]]
int16x8_t test_vqnegq_s16(int16x8_t a) {
return vqnegq_s16(a);
}
-// CHECK-LABEL: test_vqnegq_s32
-// CHECK: vqneg.s32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]]) #4
+// CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP1]]
int32x4_t test_vqnegq_s32(int32x4_t a) {
return vqnegq_s32(a);
}
-// CHECK-LABEL: test_vqrdmulh_s16
-// CHECK: vqrdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4
+// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
return vqrdmulh_s16(a, b);
}
-// CHECK-LABEL: test_vqrdmulh_s32
-// CHECK: vqrdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4
+// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
return vqrdmulh_s32(a, b);
}
-// CHECK-LABEL: test_vqrdmulhq_s16
-// CHECK: vqrdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4
+// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
return vqrdmulhq_s16(a, b);
}
-// CHECK-LABEL: test_vqrdmulhq_s32
-// CHECK: vqrdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4
+// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
return vqrdmulhq_s32(a, b);
}
-// CHECK-LABEL: test_vqrdmulh_lane_s16
-// CHECK: vqrdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4
+// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
return vqrdmulh_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vqrdmulh_lane_s32
-// CHECK: vqrdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4
+// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) {
return vqrdmulh_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vqrdmulhq_lane_s16
-// CHECK: vqrdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4
+// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
return vqrdmulhq_lane_s16(a, b, 3);
}
-// CHECK-LABEL: test_vqrdmulhq_lane_s32
-// CHECK: vqrdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4
+// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
return vqrdmulhq_lane_s32(a, b, 1);
}
-// CHECK-LABEL: test_vqrdmulh_n_s16
-// CHECK: vqrdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_n_s16(<4 x i16> %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V4_I]]) #4
+// CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V6_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
return vqrdmulh_n_s16(a, b);
}
-// CHECK-LABEL: test_vqrdmulh_n_s32
-// CHECK: vqrdmulh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_n_s32(<2 x i32> %a, i32 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQRDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V2_I]]) #4
+// CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V4_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
return vqrdmulh_n_s32(a, b);
}
-// CHECK-LABEL: test_vqrdmulhq_n_s16
-// CHECK: vqrdmulh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_n_s16(<8 x i16> %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
+// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
+// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
+// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
+// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
+// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V8_I]]) #4
+// CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V10_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
return vqrdmulhq_n_s16(a, b);
}
-// CHECK-LABEL: test_vqrdmulhq_n_s32
-// CHECK: vqrdmulh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
+// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
+// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
+// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
+// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V4_I]]) #4
+// CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V6_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
return vqrdmulhq_n_s32(a, b);
}
-// CHECK-LABEL: test_vqrshl_s8
-// CHECK: vqrshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQRSHL_V_I]]
int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
return vqrshl_s8(a, b);
}
-// CHECK-LABEL: test_vqrshl_s16
-// CHECK: vqrshl.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
return vqrshl_s16(a, b);
}
-// CHECK-LABEL: test_vqrshl_s32
-// CHECK: vqrshl.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
return vqrshl_s32(a, b);
}
-// CHECK-LABEL: test_vqrshl_s64
-// CHECK: vqrshl.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
return vqrshl_s64(a, b);
}
-// CHECK-LABEL: test_vqrshl_u8
-// CHECK: vqrshl.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQRSHL_V_I]]
uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
return vqrshl_u8(a, b);
}
-// CHECK-LABEL: test_vqrshl_u16
-// CHECK: vqrshl.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
return vqrshl_u16(a, b);
}
-// CHECK-LABEL: test_vqrshl_u32
-// CHECK: vqrshl.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
return vqrshl_u32(a, b);
}
-// CHECK-LABEL: test_vqrshl_u64
-// CHECK: vqrshl.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
return vqrshl_u64(a, b);
}
-// CHECK-LABEL: test_vqrshlq_s8
-// CHECK: vqrshl.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
return vqrshlq_s8(a, b);
}
-// CHECK-LABEL: test_vqrshlq_s16
-// CHECK: vqrshl.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
return vqrshlq_s16(a, b);
}
-// CHECK-LABEL: test_vqrshlq_s32
-// CHECK: vqrshl.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
return vqrshlq_s32(a, b);
}
-// CHECK-LABEL: test_vqrshlq_s64
-// CHECK: vqrshl.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
return vqrshlq_s64(a, b);
}
-// CHECK-LABEL: test_vqrshlq_u8
-// CHECK: vqrshl.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
return vqrshlq_u8(a, b);
}
-// CHECK-LABEL: test_vqrshlq_u16
-// CHECK: vqrshl.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
return vqrshlq_u16(a, b);
}
-// CHECK-LABEL: test_vqrshlq_u32
-// CHECK: vqrshl.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
return vqrshlq_u32(a, b);
}
-// CHECK-LABEL: test_vqrshlq_u64
-// CHECK: vqrshl.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
return vqrshlq_u64(a, b);
}
-// CHECK-LABEL: test_vqrshrn_n_s16
-// CHECK: vqrshrn.s16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 1)
+// CHECK: ret <8 x i8> [[VQRSHRN_N]]1
int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
return vqrshrn_n_s16(a, 1);
}
-// CHECK-LABEL: test_vqrshrn_n_s32
-// CHECK: vqrshrn.s32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 1)
+// CHECK: ret <4 x i16> [[VQRSHRN_N]]1
int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
return vqrshrn_n_s32(a, 1);
}
-// CHECK-LABEL: test_vqrshrn_n_s64
-// CHECK: vqrshrn.s64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 1)
+// CHECK: ret <2 x i32> [[VQRSHRN_N]]1
int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
return vqrshrn_n_s64(a, 1);
}
-// CHECK-LABEL: test_vqrshrn_n_u16
-// CHECK: vqrshrn.u16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 1)
+// CHECK: ret <8 x i8> [[VQRSHRN_N]]1
uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
return vqrshrn_n_u16(a, 1);
}
-// CHECK-LABEL: test_vqrshrn_n_u32
-// CHECK: vqrshrn.u32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 1)
+// CHECK: ret <4 x i16> [[VQRSHRN_N]]1
uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
return vqrshrn_n_u32(a, 1);
}
-// CHECK-LABEL: test_vqrshrn_n_u64
-// CHECK: vqrshrn.u64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 1)
+// CHECK: ret <2 x i32> [[VQRSHRN_N]]1
uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
return vqrshrn_n_u64(a, 1);
}
-// CHECK-LABEL: test_vqrshrun_n_s16
-// CHECK: vqrshrun.s16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 1)
+// CHECK: ret <8 x i8> [[VQRSHRUN_N]]1
uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
return vqrshrun_n_s16(a, 1);
}
-// CHECK-LABEL: test_vqrshrun_n_s32
-// CHECK: vqrshrun.s32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 1)
+// CHECK: ret <4 x i16> [[VQRSHRUN_N]]1
uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
return vqrshrun_n_s32(a, 1);
}
-// CHECK-LABEL: test_vqrshrun_n_s64
-// CHECK: vqrshrun.s64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 1)
+// CHECK: ret <2 x i32> [[VQRSHRUN_N]]1
uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
return vqrshrun_n_s64(a, 1);
}
-// CHECK-LABEL: test_vqshl_s8
-// CHECK: vqshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQSHL_V_I]]
int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
return vqshl_s8(a, b);
}
-// CHECK-LABEL: test_vqshl_s16
-// CHECK: vqshl.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
return vqshl_s16(a, b);
}
-// CHECK-LABEL: test_vqshl_s32
-// CHECK: vqshl.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
return vqshl_s32(a, b);
}
-// CHECK-LABEL: test_vqshl_s64
-// CHECK: vqshl.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
return vqshl_s64(a, b);
}
-// CHECK-LABEL: test_vqshl_u8
-// CHECK: vqshl.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQSHL_V_I]]
uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
return vqshl_u8(a, b);
}
-// CHECK-LABEL: test_vqshl_u16
-// CHECK: vqshl.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
return vqshl_u16(a, b);
}
-// CHECK-LABEL: test_vqshl_u32
-// CHECK: vqshl.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
return vqshl_u32(a, b);
}
-// CHECK-LABEL: test_vqshl_u64
-// CHECK: vqshl.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
return vqshl_u64(a, b);
}
-// CHECK-LABEL: test_vqshlq_s8
-// CHECK: vqshl.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
return vqshlq_s8(a, b);
}
-// CHECK-LABEL: test_vqshlq_s16
-// CHECK: vqshl.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
return vqshlq_s16(a, b);
}
-// CHECK-LABEL: test_vqshlq_s32
-// CHECK: vqshl.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
return vqshlq_s32(a, b);
}
-// CHECK-LABEL: test_vqshlq_s64
-// CHECK: vqshl.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
return vqshlq_s64(a, b);
}
-// CHECK-LABEL: test_vqshlq_u8
-// CHECK: vqshl.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
return vqshlq_u8(a, b);
}
-// CHECK-LABEL: test_vqshlq_u16
-// CHECK: vqshl.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
return vqshlq_u16(a, b);
}
-// CHECK-LABEL: test_vqshlq_u32
-// CHECK: vqshl.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
return vqshlq_u32(a, b);
}
-// CHECK-LABEL: test_vqshlq_u64
-// CHECK: vqshl.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
return vqshlq_u64(a, b);
}
-// CHECK-LABEL: test_vqshlu_n_s8
-// CHECK: vqshlu.s8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 {
+// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+// CHECK: ret <8 x i8> [[VQSHLU_N]]
uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
return vqshlu_n_s8(a, 1);
}
-// CHECK-LABEL: test_vqshlu_n_s16
-// CHECK: vqshlu.s16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+// CHECK: ret <4 x i16> [[VQSHLU_N]]1
uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
return vqshlu_n_s16(a, 1);
}
-// CHECK-LABEL: test_vqshlu_n_s32
-// CHECK: vqshlu.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 1, i32 1>)
+// CHECK: ret <2 x i32> [[VQSHLU_N]]1
uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
return vqshlu_n_s32(a, 1);
}
-// CHECK-LABEL: test_vqshlu_n_s64
-// CHECK: vqshlu.s64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
+// CHECK: ret <1 x i64> [[VQSHLU_N]]1
uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
return vqshlu_n_s64(a, 1);
}
-// CHECK-LABEL: test_vqshluq_n_s8
-// CHECK: vqshlu.s8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 {
+// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+// CHECK: ret <16 x i8> [[VQSHLU_N]]
uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
return vqshluq_n_s8(a, 1);
}
-// CHECK-LABEL: test_vqshluq_n_s16
-// CHECK: vqshlu.s16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+// CHECK: ret <8 x i16> [[VQSHLU_N]]1
uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
return vqshluq_n_s16(a, 1);
}
-// CHECK-LABEL: test_vqshluq_n_s32
-// CHECK: vqshlu.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+// CHECK: ret <4 x i32> [[VQSHLU_N]]1
uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
return vqshluq_n_s32(a, 1);
}
-// CHECK-LABEL: test_vqshluq_n_s64
-// CHECK: vqshlu.s64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 1, i64 1>)
+// CHECK: ret <2 x i64> [[VQSHLU_N]]1
uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
return vqshluq_n_s64(a, 1);
}
-// CHECK-LABEL: test_vqshl_n_s8
-// CHECK: vqshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 {
+// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+// CHECK: ret <8 x i8> [[VQSHL_N]]
int8x8_t test_vqshl_n_s8(int8x8_t a) {
return vqshl_n_s8(a, 1);
}
-// CHECK-LABEL: test_vqshl_n_s16
-// CHECK: vqshl.s16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+// CHECK: ret <4 x i16> [[VQSHL_N]]1
int16x4_t test_vqshl_n_s16(int16x4_t a) {
return vqshl_n_s16(a, 1);
}
-// CHECK-LABEL: test_vqshl_n_s32
-// CHECK: vqshl.s32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
+// CHECK: ret <2 x i32> [[VQSHL_N]]1
int32x2_t test_vqshl_n_s32(int32x2_t a) {
return vqshl_n_s32(a, 1);
}
-// CHECK-LABEL: test_vqshl_n_s64
-// CHECK: vqshl.s64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
+// CHECK: ret <1 x i64> [[VQSHL_N]]1
int64x1_t test_vqshl_n_s64(int64x1_t a) {
return vqshl_n_s64(a, 1);
}
-// CHECK-LABEL: test_vqshl_n_u8
-// CHECK: vqshl.u8 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 {
+// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+// CHECK: ret <8 x i8> [[VQSHL_N]]
uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
return vqshl_n_u8(a, 1);
}
-// CHECK-LABEL: test_vqshl_n_u16
-// CHECK: vqshl.u16 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+// CHECK: ret <4 x i16> [[VQSHL_N]]1
uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
return vqshl_n_u16(a, 1);
}
-// CHECK-LABEL: test_vqshl_n_u32
-// CHECK: vqshl.u32 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
+// CHECK: ret <2 x i32> [[VQSHL_N]]1
uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
return vqshl_n_u32(a, 1);
}
-// CHECK-LABEL: test_vqshl_n_u64
-// CHECK: vqshl.u64 d{{[0-9]+}}, d{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
+// CHECK: ret <1 x i64> [[VQSHL_N]]1
uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
return vqshl_n_u64(a, 1);
}
-// CHECK-LABEL: test_vqshlq_n_s8
-// CHECK: vqshl.s8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 {
+// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+// CHECK: ret <16 x i8> [[VQSHL_N]]
int8x16_t test_vqshlq_n_s8(int8x16_t a) {
return vqshlq_n_s8(a, 1);
}
-// CHECK-LABEL: test_vqshlq_n_s16
-// CHECK: vqshl.s16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+// CHECK: ret <8 x i16> [[VQSHL_N]]1
int16x8_t test_vqshlq_n_s16(int16x8_t a) {
return vqshlq_n_s16(a, 1);
}
-// CHECK-LABEL: test_vqshlq_n_s32
-// CHECK: vqshl.s32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+// CHECK: ret <4 x i32> [[VQSHL_N]]1
int32x4_t test_vqshlq_n_s32(int32x4_t a) {
return vqshlq_n_s32(a, 1);
}
-// CHECK-LABEL: test_vqshlq_n_s64
-// CHECK: vqshl.s64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
+// CHECK: ret <2 x i64> [[VQSHL_N]]1
int64x2_t test_vqshlq_n_s64(int64x2_t a) {
return vqshlq_n_s64(a, 1);
}
-// CHECK-LABEL: test_vqshlq_n_u8
-// CHECK: vqshl.u8 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 {
+// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+// CHECK: ret <16 x i8> [[VQSHL_N]]
uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
return vqshlq_n_u8(a, 1);
}
-// CHECK-LABEL: test_vqshlq_n_u16
-// CHECK: vqshl.u16 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+// CHECK: ret <8 x i16> [[VQSHL_N]]1
uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
return vqshlq_n_u16(a, 1);
}
-// CHECK-LABEL: test_vqshlq_n_u32
-// CHECK: vqshl.u32 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+// CHECK: ret <4 x i32> [[VQSHL_N]]1
uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
return vqshlq_n_u32(a, 1);
}
-// CHECK-LABEL: test_vqshlq_n_u64
-// CHECK: vqshl.u64 q{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
+// CHECK: ret <2 x i64> [[VQSHL_N]]1
uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
return vqshlq_n_u64(a, 1);
}
-// CHECK-LABEL: test_vqshrn_n_s16
-// CHECK: vqshrn.s16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 1)
+// CHECK: ret <8 x i8> [[VQSHRN_N]]1
int8x8_t test_vqshrn_n_s16(int16x8_t a) {
return vqshrn_n_s16(a, 1);
}
-// CHECK-LABEL: test_vqshrn_n_s32
-// CHECK: vqshrn.s32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 1)
+// CHECK: ret <4 x i16> [[VQSHRN_N]]1
int16x4_t test_vqshrn_n_s32(int32x4_t a) {
return vqshrn_n_s32(a, 1);
}
-// CHECK-LABEL: test_vqshrn_n_s64
-// CHECK: vqshrn.s64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 1)
+// CHECK: ret <2 x i32> [[VQSHRN_N]]1
int32x2_t test_vqshrn_n_s64(int64x2_t a) {
return vqshrn_n_s64(a, 1);
}
-// CHECK-LABEL: test_vqshrn_n_u16
-// CHECK: vqshrn.u16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 1)
+// CHECK: ret <8 x i8> [[VQSHRN_N]]1
uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
return vqshrn_n_u16(a, 1);
}
-// CHECK-LABEL: test_vqshrn_n_u32
-// CHECK: vqshrn.u32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 1)
+// CHECK: ret <4 x i16> [[VQSHRN_N]]1
uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
return vqshrn_n_u32(a, 1);
}
-// CHECK-LABEL: test_vqshrn_n_u64
-// CHECK: vqshrn.u64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 1)
+// CHECK: ret <2 x i32> [[VQSHRN_N]]1
uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
return vqshrn_n_u64(a, 1);
}
-// CHECK-LABEL: test_vqshrun_n_s16
-// CHECK: vqshrun.s16 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 1)
+// CHECK: ret <8 x i8> [[VQSHRUN_N]]1
uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
return vqshrun_n_s16(a, 1);
}
-// CHECK-LABEL: test_vqshrun_n_s32
-// CHECK: vqshrun.s32 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 1)
+// CHECK: ret <4 x i16> [[VQSHRUN_N]]1
uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
return vqshrun_n_s32(a, 1);
}
-// CHECK-LABEL: test_vqshrun_n_s64
-// CHECK: vqshrun.s64 d{{[0-9]+}}, q{{[0-9]+}}, #{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 1)
+// CHECK: ret <2 x i32> [[VQSHRUN_N]]1
uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
return vqshrun_n_s64(a, 1);
}
-// CHECK-LABEL: test_vqsub_s8
-// CHECK: vqsub.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQSUB_V_I]]
int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
return vqsub_s8(a, b);
}
-// CHECK-LABEL: test_vqsub_s16
-// CHECK: vqsub.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
return vqsub_s16(a, b);
}
-// CHECK-LABEL: test_vqsub_s32
-// CHECK: vqsub.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
return vqsub_s32(a, b);
}
-// CHECK-LABEL: test_vqsub_s64
-// CHECK: vqsub.s64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
return vqsub_s64(a, b);
}
-// CHECK-LABEL: test_vqsub_u8
-// CHECK: vqsub.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQSUB_V_I]]
uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
return vqsub_u8(a, b);
}
-// CHECK-LABEL: test_vqsub_u16
-// CHECK: vqsub.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
return vqsub_u16(a, b);
}
-// CHECK-LABEL: test_vqsub_u32
-// CHECK: vqsub.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
return vqsub_u32(a, b);
}
-// CHECK-LABEL: test_vqsub_u64
-// CHECK: vqsub.u64 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
return vqsub_u64(a, b);
}
-// CHECK-LABEL: test_vqsubq_s8
-// CHECK: vqsub.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
return vqsubq_s8(a, b);
}
-// CHECK-LABEL: test_vqsubq_s16
-// CHECK: vqsub.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
return vqsubq_s16(a, b);
}
-// CHECK-LABEL: test_vqsubq_s32
-// CHECK: vqsub.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
return vqsubq_s32(a, b);
}
-// CHECK-LABEL: test_vqsubq_s64
-// CHECK: vqsub.s64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
return vqsubq_s64(a, b);
}
-// CHECK-LABEL: test_vqsubq_u8
-// CHECK: vqsub.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
return vqsubq_u8(a, b);
}
-// CHECK-LABEL: test_vqsubq_u16
-// CHECK: vqsub.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
return vqsubq_u16(a, b);
}
-// CHECK-LABEL: test_vqsubq_u32
-// CHECK: vqsub.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
return vqsubq_u32(a, b);
}
-// CHECK-LABEL: test_vqsubq_u64
-// CHECK: vqsub.u64 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
return vqsubq_u64(a, b);
}
-// CHECK-LABEL: test_vraddhn_s16
-// CHECK: vraddhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
+// CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
return vraddhn_s16(a, b);
}
-// CHECK-LABEL: test_vraddhn_s32
-// CHECK: vraddhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
+// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
return vraddhn_s32(a, b);
}
-// CHECK-LABEL: test_vraddhn_s64
-// CHECK: vraddhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
+// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
return vraddhn_s64(a, b);
}
-// CHECK-LABEL: test_vraddhn_u16
-// CHECK: vraddhn.i16 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
+// CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
return vraddhn_u16(a, b);
}
-// CHECK-LABEL: test_vraddhn_u32
-// CHECK: vraddhn.i32 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
+// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
return vraddhn_u32(a, b);
}
-// CHECK-LABEL: test_vraddhn_u64
-// CHECK: vraddhn.i64 d{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
+// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
return vraddhn_u64(a, b);
}
-// CHECK-LABEL: test_vrecpe_f32
-// CHECK: vrecpe.f32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> [[VRECPE_V_I]]) #4
+// CHECK: ret <2 x float> [[VRECPE_V1_I]]
float32x2_t test_vrecpe_f32(float32x2_t a) {
return vrecpe_f32(a);
}
-// CHECK-LABEL: test_vrecpe_u32
-// CHECK: vrecpe.u32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> [[VRECPE_V_I]]) #4
+// CHECK: ret <2 x i32> [[VRECPE_V1_I]]
uint32x2_t test_vrecpe_u32(uint32x2_t a) {
return vrecpe_u32(a);
}
-// CHECK-LABEL: test_vrecpeq_f32
-// CHECK: vrecpe.f32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> [[VRECPEQ_V_I]]) #4
+// CHECK: ret <4 x float> [[VRECPEQ_V1_I]]
float32x4_t test_vrecpeq_f32(float32x4_t a) {
return vrecpeq_f32(a);
}
-// CHECK-LABEL: test_vrecpeq_u32
-// CHECK: vrecpe.u32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]]) #4
+// CHECK: ret <4 x i32> [[VRECPEQ_V1_I]]
uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
return vrecpeq_u32(a);
}
-// CHECK-LABEL: test_vrecps_f32
-// CHECK: vrecps.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4
+// CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float>
+// CHECK: ret <2 x float> [[TMP2]]
float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) {
return vrecps_f32(a, b);
}
-// CHECK-LABEL: test_vrecpsq_f32
-// CHECK: vrecps.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4
+// CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float>
+// CHECK: ret <4 x float> [[TMP2]]
float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) {
return vrecpsq_f32(a, b);
}
-// CHECK-LABEL: test_vreinterpret_s8_s16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
return vreinterpret_s8_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_s32
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
return vreinterpret_s8_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_s64
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
return vreinterpret_s8_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_u8
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 {
+// CHECK: ret <8 x i8> %a
int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
return vreinterpret_s8_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_u16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
return vreinterpret_s8_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_u32
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
return vreinterpret_s8_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_u64
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
return vreinterpret_s8_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_f16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
return vreinterpret_s8_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_f32
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
return vreinterpret_s8_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_p8
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 {
+// CHECK: ret <8 x i8> %a
int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
return vreinterpret_s8_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_s8_p16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
return vreinterpret_s8_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_s8
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
return vreinterpret_s16_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_s32
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
return vreinterpret_s16_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_s64
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
return vreinterpret_s16_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_u8
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
return vreinterpret_s16_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_u16
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 {
+// CHECK: ret <4 x i16> %a
int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
return vreinterpret_s16_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_u32
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
return vreinterpret_s16_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_u64
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
return vreinterpret_s16_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_f16
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
return vreinterpret_s16_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_f32
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
return vreinterpret_s16_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_p8
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
return vreinterpret_s16_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_s16_p16
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 {
+// CHECK: ret <4 x i16> %a
int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
return vreinterpret_s16_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_s8
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
return vreinterpret_s32_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_s16
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
return vreinterpret_s32_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_s64
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
return vreinterpret_s32_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_u8
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
return vreinterpret_s32_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_u16
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
return vreinterpret_s32_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_u32
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 {
+// CHECK: ret <2 x i32> %a
int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
return vreinterpret_s32_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_u64
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
return vreinterpret_s32_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_f16
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
return vreinterpret_s32_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_f32
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
return vreinterpret_s32_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_p8
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
return vreinterpret_s32_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_s32_p16
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
return vreinterpret_s32_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_s8
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
return vreinterpret_s64_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_s16
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
return vreinterpret_s64_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_s32
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
return vreinterpret_s64_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_u8
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
return vreinterpret_s64_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_u16
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
return vreinterpret_s64_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_u32
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
return vreinterpret_s64_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_u64
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 {
+// CHECK: ret <1 x i64> %a
int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
return vreinterpret_s64_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_f16
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
return vreinterpret_s64_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_f32
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
return vreinterpret_s64_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_p8
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
return vreinterpret_s64_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_s64_p16
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
return vreinterpret_s64_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_s8
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 {
+// CHECK: ret <8 x i8> %a
uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
return vreinterpret_u8_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_s16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
return vreinterpret_u8_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_s32
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
return vreinterpret_u8_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_s64
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
return vreinterpret_u8_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_u16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
return vreinterpret_u8_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_u32
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
return vreinterpret_u8_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_u64
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
return vreinterpret_u8_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_f16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
return vreinterpret_u8_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_f32
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
return vreinterpret_u8_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_p8
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 {
+// CHECK: ret <8 x i8> %a
uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
return vreinterpret_u8_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_u8_p16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
return vreinterpret_u8_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_s8
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
return vreinterpret_u16_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_s16
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 {
+// CHECK: ret <4 x i16> %a
uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
return vreinterpret_u16_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_s32
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
return vreinterpret_u16_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_s64
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
return vreinterpret_u16_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_u8
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
return vreinterpret_u16_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_u32
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
return vreinterpret_u16_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_u64
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
return vreinterpret_u16_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_f16
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
return vreinterpret_u16_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_f32
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
return vreinterpret_u16_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_p8
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
return vreinterpret_u16_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_u16_p16
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 {
+// CHECK: ret <4 x i16> %a
uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
return vreinterpret_u16_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_s8
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
return vreinterpret_u32_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_s16
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
return vreinterpret_u32_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_s32
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 {
+// CHECK: ret <2 x i32> %a
uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
return vreinterpret_u32_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_s64
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
return vreinterpret_u32_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_u8
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
return vreinterpret_u32_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_u16
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
return vreinterpret_u32_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_u64
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
return vreinterpret_u32_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_f16
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
return vreinterpret_u32_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_f32
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
return vreinterpret_u32_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_p8
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
return vreinterpret_u32_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_u32_p16
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP0]]
uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
return vreinterpret_u32_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_s8
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
return vreinterpret_u64_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_s16
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
return vreinterpret_u64_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_s32
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
return vreinterpret_u64_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_s64
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 {
+// CHECK: ret <1 x i64> %a
uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
return vreinterpret_u64_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_u8
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
return vreinterpret_u64_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_u16
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
return vreinterpret_u64_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_u32
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
return vreinterpret_u64_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_f16
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
return vreinterpret_u64_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_f32
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
return vreinterpret_u64_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_p8
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
return vreinterpret_u64_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_u64_p16
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP0]]
uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
return vreinterpret_u64_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_s8
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
return vreinterpret_f16_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_s16
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
return vreinterpret_f16_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_s32
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
return vreinterpret_f16_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_s64
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
return vreinterpret_f16_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_u8
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
return vreinterpret_f16_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_u16
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
return vreinterpret_f16_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_u32
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
return vreinterpret_f16_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_u64
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
return vreinterpret_f16_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_f32
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
return vreinterpret_f16_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_p8
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
return vreinterpret_f16_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_f16_p16
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
+// CHECK: ret <4 x half> [[TMP0]]
float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
return vreinterpret_f16_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_s8
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
return vreinterpret_f32_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_s16
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
return vreinterpret_f32_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_s32
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
return vreinterpret_f32_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_s64
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
return vreinterpret_f32_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_u8
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
return vreinterpret_f32_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_u16
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
return vreinterpret_f32_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_u32
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
return vreinterpret_f32_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_u64
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
return vreinterpret_f32_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_f16
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
return vreinterpret_f32_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_p8
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
return vreinterpret_f32_p8(a);
}
-// CHECK-LABEL: test_vreinterpret_f32_p16
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
+// CHECK: ret <2 x float> [[TMP0]]
float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
return vreinterpret_f32_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_s8
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 {
+// CHECK: ret <8 x i8> %a
poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
return vreinterpret_p8_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_s16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
return vreinterpret_p8_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_s32
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
return vreinterpret_p8_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_s64
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
return vreinterpret_p8_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_u8
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 {
+// CHECK: ret <8 x i8> %a
poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
return vreinterpret_p8_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_u16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
return vreinterpret_p8_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_u32
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
return vreinterpret_p8_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_u64
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
return vreinterpret_p8_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_f16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
return vreinterpret_p8_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_f32
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
return vreinterpret_p8_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_p8_p16
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
return vreinterpret_p8_p16(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_s8
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
return vreinterpret_p16_s8(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_s16
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 {
+// CHECK: ret <4 x i16> %a
poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
return vreinterpret_p16_s16(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_s32
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
return vreinterpret_p16_s32(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_s64
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
return vreinterpret_p16_s64(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_u8
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
return vreinterpret_p16_u8(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_u16
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 {
+// CHECK: ret <4 x i16> %a
poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
return vreinterpret_p16_u16(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_u32
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
return vreinterpret_p16_u32(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_u64
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
return vreinterpret_p16_u64(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_f16
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
return vreinterpret_p16_f16(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_f32
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
return vreinterpret_p16_f32(a);
}
-// CHECK-LABEL: test_vreinterpret_p16_p8
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP0]]
poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
return vreinterpret_p16_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_s16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
return vreinterpretq_s8_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_s32
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
return vreinterpretq_s8_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_s64
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
return vreinterpretq_s8_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_u8
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 {
+// CHECK: ret <16 x i8> %a
int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
return vreinterpretq_s8_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_u16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
return vreinterpretq_s8_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_u32
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
return vreinterpretq_s8_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_u64
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
return vreinterpretq_s8_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_f16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
return vreinterpretq_s8_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_f32
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
return vreinterpretq_s8_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_p8
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 {
+// CHECK: ret <16 x i8> %a
int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
return vreinterpretq_s8_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s8_p16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
return vreinterpretq_s8_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_s8
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
return vreinterpretq_s16_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_s32
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
return vreinterpretq_s16_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_s64
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
return vreinterpretq_s16_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_u8
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
return vreinterpretq_s16_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_u16
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 {
+// CHECK: ret <8 x i16> %a
int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
return vreinterpretq_s16_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_u32
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
return vreinterpretq_s16_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_u64
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
return vreinterpretq_s16_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_f16
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
return vreinterpretq_s16_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_f32
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
return vreinterpretq_s16_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_p8
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
return vreinterpretq_s16_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s16_p16
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 {
+// CHECK: ret <8 x i16> %a
int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
return vreinterpretq_s16_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_s8
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
return vreinterpretq_s32_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_s16
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
return vreinterpretq_s32_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_s64
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
return vreinterpretq_s32_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_u8
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
return vreinterpretq_s32_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_u16
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
return vreinterpretq_s32_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_u32
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 {
+// CHECK: ret <4 x i32> %a
int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
return vreinterpretq_s32_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_u64
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
return vreinterpretq_s32_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_f16
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
return vreinterpretq_s32_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_f32
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
return vreinterpretq_s32_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_p8
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
return vreinterpretq_s32_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s32_p16
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
return vreinterpretq_s32_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_s8
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
return vreinterpretq_s64_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_s16
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
return vreinterpretq_s64_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_s32
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
return vreinterpretq_s64_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_u8
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
return vreinterpretq_s64_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_u16
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
return vreinterpretq_s64_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_u32
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
return vreinterpretq_s64_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_u64
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 {
+// CHECK: ret <2 x i64> %a
int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
return vreinterpretq_s64_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_f16
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
return vreinterpretq_s64_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_f32
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
return vreinterpretq_s64_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_p8
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
return vreinterpretq_s64_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_s64_p16
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
return vreinterpretq_s64_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_s8
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 {
+// CHECK: ret <16 x i8> %a
uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
return vreinterpretq_u8_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_s16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
return vreinterpretq_u8_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_s32
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
return vreinterpretq_u8_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_s64
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
return vreinterpretq_u8_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_u16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
return vreinterpretq_u8_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_u32
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
return vreinterpretq_u8_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_u64
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
return vreinterpretq_u8_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_f16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
return vreinterpretq_u8_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_f32
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
return vreinterpretq_u8_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_p8
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 {
+// CHECK: ret <16 x i8> %a
uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
return vreinterpretq_u8_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u8_p16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
return vreinterpretq_u8_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_s8
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
return vreinterpretq_u16_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_s16
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 {
+// CHECK: ret <8 x i16> %a
uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
return vreinterpretq_u16_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_s32
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
return vreinterpretq_u16_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_s64
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
return vreinterpretq_u16_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_u8
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
return vreinterpretq_u16_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_u32
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
return vreinterpretq_u16_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_u64
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
return vreinterpretq_u16_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_f16
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
return vreinterpretq_u16_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_f32
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
return vreinterpretq_u16_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_p8
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
return vreinterpretq_u16_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u16_p16
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 {
+// CHECK: ret <8 x i16> %a
uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
return vreinterpretq_u16_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_s8
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
return vreinterpretq_u32_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_s16
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
return vreinterpretq_u32_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_s32
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 {
+// CHECK: ret <4 x i32> %a
uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
return vreinterpretq_u32_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_s64
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
return vreinterpretq_u32_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_u8
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
return vreinterpretq_u32_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_u16
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
return vreinterpretq_u32_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_u64
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
return vreinterpretq_u32_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_f16
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
return vreinterpretq_u32_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_f32
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
return vreinterpretq_u32_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_p8
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
return vreinterpretq_u32_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u32_p16
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP0]]
uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
return vreinterpretq_u32_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_s8
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
return vreinterpretq_u64_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_s16
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
return vreinterpretq_u64_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_s32
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
return vreinterpretq_u64_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_s64
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 {
+// CHECK: ret <2 x i64> %a
uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
return vreinterpretq_u64_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_u8
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
return vreinterpretq_u64_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_u16
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
return vreinterpretq_u64_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_u32
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
return vreinterpretq_u64_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_f16
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
return vreinterpretq_u64_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_f32
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
return vreinterpretq_u64_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_p8
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
return vreinterpretq_u64_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_u64_p16
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP0]]
uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
return vreinterpretq_u64_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_s8
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
return vreinterpretq_f16_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_s16
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
return vreinterpretq_f16_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_s32
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
return vreinterpretq_f16_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_s64
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
return vreinterpretq_f16_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_u8
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
return vreinterpretq_f16_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_u16
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
return vreinterpretq_f16_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_u32
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
return vreinterpretq_f16_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_u64
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
return vreinterpretq_f16_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_f32
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
return vreinterpretq_f16_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_p8
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
return vreinterpretq_f16_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_f16_p16
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
+// CHECK: ret <8 x half> [[TMP0]]
float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
return vreinterpretq_f16_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_s8
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
return vreinterpretq_f32_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_s16
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
return vreinterpretq_f32_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_s32
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
return vreinterpretq_f32_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_s64
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
return vreinterpretq_f32_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_u8
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
return vreinterpretq_f32_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_u16
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
return vreinterpretq_f32_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_u32
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
return vreinterpretq_f32_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_u64
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
return vreinterpretq_f32_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_f16
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
return vreinterpretq_f32_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_p8
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
return vreinterpretq_f32_p8(a);
}
-// CHECK-LABEL: test_vreinterpretq_f32_p16
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
+// CHECK: ret <4 x float> [[TMP0]]
float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
return vreinterpretq_f32_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_s8
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 {
+// CHECK: ret <16 x i8> %a
poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
return vreinterpretq_p8_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_s16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
return vreinterpretq_p8_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_s32
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
return vreinterpretq_p8_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_s64
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
return vreinterpretq_p8_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_u8
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 {
+// CHECK: ret <16 x i8> %a
poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
return vreinterpretq_p8_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_u16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
return vreinterpretq_p8_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_u32
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
return vreinterpretq_p8_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_u64
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
return vreinterpretq_p8_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_f16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
return vreinterpretq_p8_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_f32
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
return vreinterpretq_p8_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_p8_p16
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: ret <16 x i8> [[TMP0]]
poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
return vreinterpretq_p8_p16(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_s8
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
return vreinterpretq_p16_s8(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_s16
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 {
+// CHECK: ret <8 x i16> %a
poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
return vreinterpretq_p16_s16(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_s32
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
return vreinterpretq_p16_s32(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_s64
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
return vreinterpretq_p16_s64(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_u8
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
return vreinterpretq_p16_u8(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_u16
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 {
+// CHECK: ret <8 x i16> %a
poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
return vreinterpretq_p16_u16(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_u32
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
return vreinterpretq_p16_u32(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_u64
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
return vreinterpretq_p16_u64(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_f16
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
return vreinterpretq_p16_f16(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_f32
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
return vreinterpretq_p16_f32(a);
}
-// CHECK-LABEL: test_vreinterpretq_p16_p8
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
return vreinterpretq_p16_p8(a);
}
-// CHECK-LABEL: test_vrev16_s8
-// CHECK: vrev16.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
int8x8_t test_vrev16_s8(int8x8_t a) {
return vrev16_s8(a);
}
-// CHECK-LABEL: test_vrev16_u8
-// CHECK: vrev16.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrev16_u8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
uint8x8_t test_vrev16_u8(uint8x8_t a) {
return vrev16_u8(a);
}
-// CHECK-LABEL: test_vrev16_p8
-// CHECK: vrev16.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrev16_p8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
poly8x8_t test_vrev16_p8(poly8x8_t a) {
return vrev16_p8(a);
}
-// CHECK-LABEL: test_vrev16q_s8
-// CHECK: vrev16.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vrev16q_s8(int8x16_t a) {
return vrev16q_s8(a);
}
-// CHECK-LABEL: test_vrev16q_u8
-// CHECK: vrev16.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrev16q_u8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
uint8x16_t test_vrev16q_u8(uint8x16_t a) {
return vrev16q_u8(a);
}
-// CHECK-LABEL: test_vrev16q_p8
-// CHECK: vrev16.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrev16q_p8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
poly8x16_t test_vrev16q_p8(poly8x16_t a) {
return vrev16q_p8(a);
}
-// CHECK-LABEL: test_vrev32_s8
-// CHECK: vrev32.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
int8x8_t test_vrev32_s8(int8x8_t a) {
return vrev32_s8(a);
}
-// CHECK-LABEL: test_vrev32_s16
-// CHECK: vrev32.16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
int16x4_t test_vrev32_s16(int16x4_t a) {
return vrev32_s16(a);
}
-// CHECK-LABEL: test_vrev32_u8
-// CHECK: vrev32.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrev32_u8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
uint8x8_t test_vrev32_u8(uint8x8_t a) {
return vrev32_u8(a);
}
-// CHECK-LABEL: test_vrev32_u16
-// CHECK: vrev32.16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vrev32_u16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
uint16x4_t test_vrev32_u16(uint16x4_t a) {
return vrev32_u16(a);
}
-// CHECK-LABEL: test_vrev32_p8
-// CHECK: vrev32.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrev32_p8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
poly8x8_t test_vrev32_p8(poly8x8_t a) {
return vrev32_p8(a);
}
-// CHECK-LABEL: test_vrev32_p16
-// CHECK: vrev32.16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vrev32_p16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
poly16x4_t test_vrev32_p16(poly16x4_t a) {
return vrev32_p16(a);
}
-// CHECK-LABEL: test_vrev32q_s8
-// CHECK: vrev32.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vrev32q_s8(int8x16_t a) {
return vrev32q_s8(a);
}
-// CHECK-LABEL: test_vrev32q_s16
-// CHECK: vrev32.16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
int16x8_t test_vrev32q_s16(int16x8_t a) {
return vrev32q_s16(a);
}
-// CHECK-LABEL: test_vrev32q_u8
-// CHECK: vrev32.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrev32q_u8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
uint8x16_t test_vrev32q_u8(uint8x16_t a) {
return vrev32q_u8(a);
}
-// CHECK-LABEL: test_vrev32q_u16
-// CHECK: vrev32.16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vrev32q_u16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
uint16x8_t test_vrev32q_u16(uint16x8_t a) {
return vrev32q_u16(a);
}
-// CHECK-LABEL: test_vrev32q_p8
-// CHECK: vrev32.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrev32q_p8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
poly8x16_t test_vrev32q_p8(poly8x16_t a) {
return vrev32q_p8(a);
}
-// CHECK-LABEL: test_vrev32q_p16
-// CHECK: vrev32.16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vrev32q_p16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
poly16x8_t test_vrev32q_p16(poly16x8_t a) {
return vrev32q_p16(a);
}
-// CHECK-LABEL: test_vrev64_s8
-// CHECK: vrev64.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
int8x8_t test_vrev64_s8(int8x8_t a) {
return vrev64_s8(a);
}
-// CHECK-LABEL: test_vrev64_s16
-// CHECK: vrev64.16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
int16x4_t test_vrev64_s16(int16x4_t a) {
return vrev64_s16(a);
}
-// CHECK-LABEL: test_vrev64_s32
-// CHECK: vrev64.32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
+// CHECK: ret <2 x i32> [[SHUFFLE_I]]
int32x2_t test_vrev64_s32(int32x2_t a) {
return vrev64_s32(a);
}
-// CHECK-LABEL: test_vrev64_u8
-// CHECK: vrev64.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrev64_u8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
uint8x8_t test_vrev64_u8(uint8x8_t a) {
return vrev64_u8(a);
}
-// CHECK-LABEL: test_vrev64_u16
-// CHECK: vrev64.16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vrev64_u16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
uint16x4_t test_vrev64_u16(uint16x4_t a) {
return vrev64_u16(a);
}
-// CHECK-LABEL: test_vrev64_u32
-// CHECK: vrev64.32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vrev64_u32(<2 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
+// CHECK: ret <2 x i32> [[SHUFFLE_I]]
uint32x2_t test_vrev64_u32(uint32x2_t a) {
return vrev64_u32(a);
}
-// CHECK-LABEL: test_vrev64_p8
-// CHECK: vrev64.8 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrev64_p8(<8 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK: ret <8 x i8> [[SHUFFLE_I]]
poly8x8_t test_vrev64_p8(poly8x8_t a) {
return vrev64_p8(a);
}
-// CHECK-LABEL: test_vrev64_p16
-// CHECK: vrev64.16 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vrev64_p16(<4 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK: ret <4 x i16> [[SHUFFLE_I]]
poly16x4_t test_vrev64_p16(poly16x4_t a) {
return vrev64_p16(a);
}
-// CHECK-LABEL: test_vrev64_f32
-// CHECK: vrev64.32 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
+// CHECK: ret <2 x float> [[SHUFFLE_I]]
float32x2_t test_vrev64_f32(float32x2_t a) {
return vrev64_f32(a);
}
-// CHECK-LABEL: test_vrev64q_s8
-// CHECK: vrev64.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vrev64q_s8(int8x16_t a) {
return vrev64q_s8(a);
}
-// CHECK-LABEL: test_vrev64q_s16
-// CHECK: vrev64.16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
int16x8_t test_vrev64q_s16(int16x8_t a) {
return vrev64q_s16(a);
}
-// CHECK-LABEL: test_vrev64q_s32
-// CHECK: vrev64.32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
int32x4_t test_vrev64q_s32(int32x4_t a) {
return vrev64q_s32(a);
}
-// CHECK-LABEL: test_vrev64q_u8
-// CHECK: vrev64.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrev64q_u8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
uint8x16_t test_vrev64q_u8(uint8x16_t a) {
return vrev64q_u8(a);
}
-// CHECK-LABEL: test_vrev64q_u16
-// CHECK: vrev64.16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vrev64q_u16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
uint16x8_t test_vrev64q_u16(uint16x8_t a) {
return vrev64q_u16(a);
}
-// CHECK-LABEL: test_vrev64q_u32
-// CHECK: vrev64.32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vrev64q_u32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
uint32x4_t test_vrev64q_u32(uint32x4_t a) {
return vrev64q_u32(a);
}
-// CHECK-LABEL: test_vrev64q_p8
-// CHECK: vrev64.8 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrev64q_p8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
poly8x16_t test_vrev64q_p8(poly8x16_t a) {
return vrev64q_p8(a);
}
-// CHECK-LABEL: test_vrev64q_p16
-// CHECK: vrev64.16 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vrev64q_p16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
poly16x8_t test_vrev64q_p16(poly16x8_t a) {
return vrev64q_p16(a);
}
-// CHECK-LABEL: test_vrev64q_f32
-// CHECK: vrev64.32 q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK: ret <4 x float> [[SHUFFLE_I]]
float32x4_t test_vrev64q_f32(float32x4_t a) {
return vrev64q_f32(a);
}
-// CHECK-LABEL: test_vrhadd_s8
-// CHECK: vrhadd.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VRHADD_V_I]]
int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) {
return vrhadd_s8(a, b);
}
-// CHECK-LABEL: test_vrhadd_s16
-// CHECK: vrhadd.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
+// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) {
return vrhadd_s16(a, b);
}
-// CHECK-LABEL: test_vrhadd_s32
-// CHECK: vrhadd.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
+// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) {
return vrhadd_s32(a, b);
}
-// CHECK-LABEL: test_vrhadd_u8
-// CHECK: vrhadd.u8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VRHADD_V_I]]
uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) {
return vrhadd_u8(a, b);
}
-// CHECK-LABEL: test_vrhadd_u16
-// CHECK: vrhadd.u16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
+// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) {
return vrhadd_u16(a, b);
}
-// CHECK-LABEL: test_vrhadd_u32
-// CHECK: vrhadd.u32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
+// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) {
return vrhadd_u32(a, b);
}
-// CHECK-LABEL: test_vrhaddq_s8
-// CHECK: vrhadd.s8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) {
return vrhaddq_s8(a, b);
}
-// CHECK-LABEL: test_vrhaddq_s16
-// CHECK: vrhadd.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
+// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) {
return vrhaddq_s16(a, b);
}
-// CHECK-LABEL: test_vrhaddq_s32
-// CHECK: vrhadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
+// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) {
return vrhaddq_s32(a, b);
}
-// CHECK-LABEL: test_vrhaddq_u8
-// CHECK: vrhadd.u8 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) {
return vrhaddq_u8(a, b);
}
-// CHECK-LABEL: test_vrhaddq_u16
-// CHECK: vrhadd.u16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
+// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) {
return vrhaddq_u16(a, b);
}
-// CHECK-LABEL: test_vrhaddq_u32
-// CHECK: vrhadd.u32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
+// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) {
return vrhaddq_u32(a, b);
}
-// CHECK-LABEL: test_vrshl_s8
-// CHECK: vrshl.s8 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VRSHL_V_I]]
int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
return vrshl_s8(a, b);
}
-// CHECK-LABEL: test_vrshl_s16
-// CHECK: vrshl.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
+// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
return vrshl_s16(a, b);
}
-// CHECK-LABEL: test_vrshl_s32
-// CHECK: vrshl.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
+// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
return vrshl_s32(a, b);
}
-// CHECK-LABEL: test_vrshl_s64
[... 9388 lines stripped ...]
More information about the cfe-commits
mailing list