r263048 - ARM & AArch64: convert asm tests to LLVM IR and restrict optimizations.
Tim Northover via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 9 10:54:43 PST 2016
Modified: cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c?rev=263048&r1=263047&r2=263048&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c Wed Mar 9 12:54:42 2016
@@ -1,334 +1,394 @@
-// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+// RUN: -fallow-half-arguments-and-returns -ffp-contract=fast -S -emit-llvm -o - %s \
+// RUN: | opt -S -mem2reg \
+// RUN: | FileCheck %s
// Test new aarch64 intrinsics and types
#include <arm_neon.h>
+// CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
+// CHECK: ret <8 x i8> [[ADD_I]]
int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
- // CHECK-LABEL: test_vadd_s8
return vadd_s8(v1, v2);
- // CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
+// CHECK: ret <4 x i16> [[ADD_I]]
int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
- // CHECK-LABEL: test_vadd_s16
return vadd_s16(v1, v2);
- // CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
+// CHECK: ret <2 x i32> [[ADD_I]]
int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
- // CHECK-LABEL: test_vadd_s32
return vadd_s32(v1, v2);
- // CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %v1, <1 x i64> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
+// CHECK: ret <1 x i64> [[ADD_I]]
int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
- // CHECK-LABEL: test_vadd_s64
return vadd_s64(v1, v2);
- // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
+// CHECK: ret <2 x float> [[ADD_I]]
float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vadd_f32
return vadd_f32(v1, v2);
- // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
+// CHECK: ret <8 x i8> [[ADD_I]]
uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
- // CHECK-LABEL: test_vadd_u8
return vadd_u8(v1, v2);
- // CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
+// CHECK: ret <4 x i16> [[ADD_I]]
uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
- // CHECK-LABEL: test_vadd_u16
return vadd_u16(v1, v2);
- // CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
+// CHECK: ret <2 x i32> [[ADD_I]]
uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
- // CHECK-LABEL: test_vadd_u32
return vadd_u32(v1, v2);
- // CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %v1, <1 x i64> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
+// CHECK: ret <1 x i64> [[ADD_I]]
uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
- // CHECK-LABEL: test_vadd_u64
return vadd_u64(v1, v2);
- // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
+// CHECK: ret <16 x i8> [[ADD_I]]
int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
- // CHECK-LABEL: test_vaddq_s8
return vaddq_s8(v1, v2);
- // CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
- // CHECK-LABEL: test_vaddq_s16
return vaddq_s16(v1, v2);
- // CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t v2) {
- // CHECK-LABEL: test_vaddq_s32
return vaddq_s32(v1, v2);
- // CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
- // CHECK-LABEL: test_vaddq_s64
return vaddq_s64(v1, v2);
- // CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
+// CHECK: ret <4 x float> [[ADD_I]]
float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vaddq_f32
return vaddq_f32(v1, v2);
- // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vaddq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
+// CHECK: ret <2 x double> [[ADD_I]]
float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vaddq_f64
return vaddq_f64(v1, v2);
- // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
+// CHECK: ret <16 x i8> [[ADD_I]]
uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
- // CHECK-LABEL: test_vaddq_u8
return vaddq_u8(v1, v2);
- // CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
- // CHECK-LABEL: test_vaddq_u16
return vaddq_u16(v1, v2);
- // CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
- // CHECK: vaddq_u32
return vaddq_u32(v1, v2);
- // CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
- // CHECK-LABEL: test_vaddq_u64
return vaddq_u64(v1, v2);
- // CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
+// CHECK: ret <8 x i8> [[SUB_I]]
int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
- // CHECK-LABEL: test_vsub_s8
return vsub_s8(v1, v2);
- // CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
+// CHECK: ret <4 x i16> [[SUB_I]]
int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
- // CHECK-LABEL: test_vsub_s16
return vsub_s16(v1, v2);
- // CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
+// CHECK: ret <2 x i32> [[SUB_I]]
int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
- // CHECK-LABEL: test_vsub_s32
return vsub_s32(v1, v2);
- // CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %v1, <1 x i64> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
+// CHECK: ret <1 x i64> [[SUB_I]]
int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
- // CHECK-LABEL: test_vsub_s64
return vsub_s64(v1, v2);
- // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
+// CHECK: ret <2 x float> [[SUB_I]]
float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vsub_f32
return vsub_f32(v1, v2);
- // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
+// CHECK: ret <8 x i8> [[SUB_I]]
uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
- // CHECK-LABEL: test_vsub_u8
return vsub_u8(v1, v2);
- // CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
+// CHECK: ret <4 x i16> [[SUB_I]]
uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
- // CHECK-LABEL: test_vsub_u16
return vsub_u16(v1, v2);
- // CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
+// CHECK: ret <2 x i32> [[SUB_I]]
uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
- // CHECK-LABEL: test_vsub_u32
return vsub_u32(v1, v2);
- // CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %v1, <1 x i64> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
+// CHECK: ret <1 x i64> [[SUB_I]]
uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
- // CHECK-LABEL: test_vsub_u64
return vsub_u64(v1, v2);
- // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
+// CHECK: ret <16 x i8> [[SUB_I]]
int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
- // CHECK-LABEL: test_vsubq_s8
return vsubq_s8(v1, v2);
- // CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
- // CHECK-LABEL: test_vsubq_s16
return vsubq_s16(v1, v2);
- // CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t v2) {
- // CHECK-LABEL: test_vsubq_s32
return vsubq_s32(v1, v2);
- // CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
+// CHECK: ret <2 x i64> [[SUB_I]]
int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
- // CHECK-LABEL: test_vsubq_s64
return vsubq_s64(v1, v2);
- // CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
+// CHECK: ret <4 x float> [[SUB_I]]
float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vsubq_f32
return vsubq_f32(v1, v2);
- // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vsubq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
+// CHECK: ret <2 x double> [[SUB_I]]
float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vsubq_f64
return vsubq_f64(v1, v2);
- // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
+// CHECK: ret <16 x i8> [[SUB_I]]
uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
- // CHECK-LABEL: test_vsubq_u8
return vsubq_u8(v1, v2);
- // CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
- // CHECK-LABEL: test_vsubq_u16
return vsubq_u16(v1, v2);
- // CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
- // CHECK: vsubq_u32
return vsubq_u32(v1, v2);
- // CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
+// CHECK: ret <2 x i64> [[SUB_I]]
uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
- // CHECK-LABEL: test_vsubq_u64
return vsubq_u64(v1, v2);
- // CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
+// CHECK: ret <8 x i8> [[MUL_I]]
int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
- // CHECK-LABEL: test_vmul_s8
return vmul_s8(v1, v2);
- // CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
+// CHECK: ret <4 x i16> [[MUL_I]]
int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
- // CHECK-LABEL: test_vmul_s16
return vmul_s16(v1, v2);
- // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
+// CHECK: ret <2 x i32> [[MUL_I]]
int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
- // CHECK-LABEL: test_vmul_s32
return vmul_s32(v1, v2);
- // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
+// CHECK: ret <2 x float> [[MUL_I]]
float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vmul_f32
return vmul_f32(v1, v2);
- // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
+// CHECK: ret <8 x i8> [[MUL_I]]
uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
- // CHECK-LABEL: test_vmul_u8
return vmul_u8(v1, v2);
- // CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
+// CHECK: ret <4 x i16> [[MUL_I]]
uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
- // CHECK-LABEL: test_vmul_u16
return vmul_u16(v1, v2);
- // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
+// CHECK: ret <2 x i32> [[MUL_I]]
uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
- // CHECK-LABEL: test_vmul_u32
return vmul_u32(v1, v2);
- // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
+// CHECK: ret <16 x i8> [[MUL_I]]
int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
- // CHECK-LABEL: test_vmulq_s8
return vmulq_s8(v1, v2);
- // CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
+// CHECK: ret <8 x i16> [[MUL_I]]
int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
- // CHECK-LABEL: test_vmulq_s16
return vmulq_s16(v1, v2);
- // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
+// CHECK: ret <4 x i32> [[MUL_I]]
int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
- // CHECK-LABEL: test_vmulq_s32
return vmulq_s32(v1, v2);
- // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
+// CHECK: ret <16 x i8> [[MUL_I]]
uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
- // CHECK-LABEL: test_vmulq_u8
return vmulq_u8(v1, v2);
- // CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
+// CHECK: ret <8 x i16> [[MUL_I]]
uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
- // CHECK-LABEL: test_vmulq_u16
return vmulq_u16(v1, v2);
- // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
+// CHECK: ret <4 x i32> [[MUL_I]]
uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
- // CHECK-LABEL: test_vmulq_u32
return vmulq_u32(v1, v2);
- // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
+// CHECK: ret <4 x float> [[MUL_I]]
float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vmulq_f32
return vmulq_f32(v1, v2);
- // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vmulq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
+// CHECK: ret <2 x double> [[MUL_I]]
float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vmulq_f64
return vmulq_f64(v1, v2);
- // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
+// CHECK: ret <8 x i8> [[VMUL_V_I]]
poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
// test_vmul_p8
return vmul_p8(v1, v2);
// pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
+// CHECK: ret <16 x i8> [[VMULQ_V_I]]
poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
// test_vmulq_p8
return vmulq_p8(v1, v2);
@@ -336,1295 +396,2132 @@ poly8x16_t test_vmulq_p8(poly8x16_t v1,
}
+// CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
+// CHECK: ret <8 x i8> [[ADD_I]]
int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
- // CHECK-LABEL: test_vmla_s8
return vmla_s8(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <8 x i8> @test_vmla_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
- // CHECK-LABEL: test_vmla_s16
return vmla_s16(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
- // CHECK-LABEL: test_vmla_s32
return vmla_s32(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
+// CHECK: ret <2 x float> [[ADD_I]]
float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
- // CHECK-LABEL: test_vmla_f32
return vmla_f32(v1, v2, v3);
- // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
+// CHECK: ret <8 x i8> [[ADD_I]]
uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
- // CHECK-LABEL: test_vmla_u8
return vmla_u8(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
+// CHECK: ret <4 x i16> [[ADD_I]]
uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
- // CHECK-LABEL: test_vmla_u16
return vmla_u16(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
- // CHECK-LABEL: test_vmla_u32
return vmla_u32(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
+// CHECK: ret <16 x i8> [[ADD_I]]
int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
- // CHECK-LABEL: test_vmlaq_s8
return vmlaq_s8(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
- // CHECK-LABEL: test_vmlaq_s16
return vmlaq_s16(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
- // CHECK-LABEL: test_vmlaq_s32
return vmlaq_s32(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
+// CHECK: ret <4 x float> [[ADD_I]]
float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
- // CHECK-LABEL: test_vmlaq_f32
return vmlaq_f32(v1, v2, v3);
- // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
+// CHECK: ret <16 x i8> [[ADD_I]]
uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
- // CHECK-LABEL: test_vmlaq_u8
return vmlaq_u8(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
- // CHECK-LABEL: test_vmlaq_u16
return vmlaq_u16(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
- // CHECK-LABEL: test_vmlaq_u32
return vmlaq_u32(v1, v2, v3);
- // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vmlaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
+// CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
+// CHECK: ret <2 x double> [[ADD_I]]
float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
- // CHECK-LABEL: test_vmlaq_f64
return vmlaq_f64(v1, v2, v3);
- // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
+// CHECK: ret <8 x i8> [[SUB_I]]
int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
- // CHECK-LABEL: test_vmls_s8
return vmls_s8(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <8 x i8> @test_vmls_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
- // CHECK-LABEL: test_vmls_s16
return vmls_s16(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
+// CHECK: ret <2 x i32> [[SUB_I]]
int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
- // CHECK-LABEL: test_vmls_s32
return vmls_s32(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
+// CHECK: ret <2 x float> [[SUB_I]]
float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
- // CHECK-LABEL: test_vmls_f32
return vmls_f32(v1, v2, v3);
- // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
+// CHECK: ret <8 x i8> [[SUB_I]]
uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
- // CHECK-LABEL: test_vmls_u8
return vmls_u8(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
+// CHECK: ret <4 x i16> [[SUB_I]]
uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
- // CHECK-LABEL: test_vmls_u16
return vmls_u16(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
+// CHECK: ret <2 x i32> [[SUB_I]]
uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
- // CHECK-LABEL: test_vmls_u32
return vmls_u32(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
+// CHECK: ret <16 x i8> [[SUB_I]]
int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
- // CHECK-LABEL: test_vmlsq_s8
return vmlsq_s8(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
- // CHECK-LABEL: test_vmlsq_s16
return vmlsq_s16(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
- // CHECK-LABEL: test_vmlsq_s32
return vmlsq_s32(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
+// CHECK: ret <4 x float> [[SUB_I]]
float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
- // CHECK-LABEL: test_vmlsq_f32
return vmlsq_f32(v1, v2, v3);
- // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
+// CHECK: ret <16 x i8> [[SUB_I]]
uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
- // CHECK-LABEL: test_vmlsq_u8
return vmlsq_u8(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
- // CHECK-LABEL: test_vmlsq_u16
return vmlsq_u16(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
- // CHECK-LABEL: test_vmlsq_u32
return vmlsq_u32(v1, v2, v3);
- // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vmlsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
+// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
+// CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
+// CHECK: ret <2 x double> [[SUB_I]]
float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
- // CHECK-LABEL: test_vmlsq_f64
return vmlsq_f64(v1, v2, v3);
- // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
+// CHECK: ret <2 x float> [[TMP6]]
float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
- // CHECK-LABEL: test_vfma_f32
return vfma_f32(v1, v2, v3);
- // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
+// CHECK: ret <4 x float> [[TMP6]]
float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
- // CHECK-LABEL: test_vfmaq_f32
return vfmaq_f32(v1, v2, v3);
- // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vfmaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4
+// CHECK: ret <2 x double> [[TMP6]]
float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
- // CHECK-LABEL: test_vfmaq_f64
return vfmaq_f64(v1, v2, v3);
- // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[TMP4:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP3]]
+// CHECK: [[FMLS_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK: [[FMLS1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[FMLS2_I:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLS_I]], <2 x float> [[TMP4]], <2 x float> [[FMLS1_I]]) #4
+// CHECK: ret <2 x float> [[FMLS2_I]]
float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
- // CHECK-LABEL: test_vfms_f32
return vfms_f32(v1, v2, v3);
- // CHECK: fmls v0.2s, {{v1.2s, v2.2s|v2.2s, v1.2s}}
}
+// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[TMP4:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[TMP3]]
+// CHECK: [[FMLS_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK: [[FMLS1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[FMLS2_I:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLS_I]], <4 x float> [[TMP4]], <4 x float> [[FMLS1_I]]) #4
+// CHECK: ret <4 x float> [[FMLS2_I]]
float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
- // CHECK-LABEL: test_vfmsq_f32
return vfmsq_f32(v1, v2, v3);
- // CHECK: fmls v0.4s, {{v1.4s, v2.4s|v2.4s, v1.4s}}
}
+// CHECK-LABEL: define <2 x double> @test_vfmsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP3]]
+// CHECK: [[FMLS_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK: [[FMLS1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[FMLS2_I:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLS_I]], <2 x double> [[TMP4]], <2 x double> [[FMLS1_I]]) #4
+// CHECK: ret <2 x double> [[FMLS2_I]]
float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
- // CHECK: vfmsq_f64
return vfmsq_f64(v1, v2, v3);
- // CHECK: fmls v0.2d, {{v1.2d, v2.2d|v2.2d, v1.2d}}
}
+// CHECK-LABEL: define <2 x double> @test_vdivq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
+// CHECK: ret <2 x double> [[DIV_I]]
float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vdivq_f64
return vdivq_f64(v1, v2);
- // CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <4 x float> @test_vdivq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
+// CHECK: ret <4 x float> [[DIV_I]]
float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vdivq_f32
return vdivq_f32(v1, v2);
- // CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x float> @test_vdiv_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
+// CHECK: ret <2 x float> [[DIV_I]]
float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vdiv_f32
return vdiv_f32(v1, v2);
- // CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
+// CHECK: ret <8 x i8> [[ADD_I]]
int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
- // CHECK-LABEL: test_vaba_s8
return vaba_s8(v1, v2, v3);
- // CHECK: saba {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
+// CHECK: ret <4 x i16> [[ADD_I]]
int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
- // CHECK-LABEL: test_vaba_s16
return vaba_s16(v1, v2, v3);
- // CHECK: saba {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
- // CHECK-LABEL: test_vaba_s32
return vaba_s32(v1, v2, v3);
- // CHECK: saba {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
+// CHECK: ret <8 x i8> [[ADD_I]]
uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
- // CHECK-LABEL: test_vaba_u8
return vaba_u8(v1, v2, v3);
- // CHECK: uaba {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
+// CHECK: ret <4 x i16> [[ADD_I]]
uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
- // CHECK-LABEL: test_vaba_u16
return vaba_u16(v1, v2, v3);
- // CHECK: uaba {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
+// CHECK: ret <2 x i32> [[ADD_I]]
uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
- // CHECK-LABEL: test_vaba_u32
return vaba_u32(v1, v2, v3);
- // CHECK: uaba {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
+// CHECK: ret <16 x i8> [[ADD_I]]
int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
- // CHECK-LABEL: test_vabaq_s8
return vabaq_s8(v1, v2, v3);
- // CHECK: saba {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
- // CHECK-LABEL: test_vabaq_s16
return vabaq_s16(v1, v2, v3);
- // CHECK: saba {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
- // CHECK-LABEL: test_vabaq_s32
return vabaq_s32(v1, v2, v3);
- // CHECK: saba {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4
+// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
+// CHECK: ret <16 x i8> [[ADD_I]]
uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
- // CHECK-LABEL: test_vabaq_u8
return vabaq_u8(v1, v2, v3);
- // CHECK: uaba {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
- // CHECK-LABEL: test_vabaq_u16
return vabaq_u16(v1, v2, v3);
- // CHECK: uaba {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
- // CHECK-LABEL: test_vabaq_u32
return vabaq_u32(v1, v2, v3);
- // CHECK: uaba {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
+// CHECK: ret <8 x i8> [[VABD_I]]
int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
- // CHECK-LABEL: test_vabd_s8
return vabd_s8(v1, v2);
- // CHECK: sabd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
+// CHECK: ret <4 x i16> [[VABD2_I]]
int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
- // CHECK-LABEL: test_vabd_s16
return vabd_s16(v1, v2);
- // CHECK: sabd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
+// CHECK: ret <2 x i32> [[VABD2_I]]
int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
- // CHECK-LABEL: test_vabd_s32
return vabd_s32(v1, v2);
- // CHECK: sabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
+// CHECK: ret <8 x i8> [[VABD_I]]
uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
- // CHECK-LABEL: test_vabd_u8
return vabd_u8(v1, v2);
- // CHECK: uabd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
+// CHECK: ret <4 x i16> [[VABD2_I]]
uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
- // CHECK-LABEL: test_vabd_u16
return vabd_u16(v1, v2);
- // CHECK: uabd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
+// CHECK: ret <2 x i32> [[VABD2_I]]
uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
- // CHECK-LABEL: test_vabd_u32
return vabd_u32(v1, v2);
- // CHECK: uabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) #4
+// CHECK: ret <2 x float> [[VABD2_I]]
float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vabd_f32
return vabd_f32(v1, v2);
- // CHECK: fabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
+// CHECK: ret <16 x i8> [[VABD_I]]
int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
- // CHECK-LABEL: test_vabdq_s8
return vabdq_s8(v1, v2);
- // CHECK: sabd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
+// CHECK: ret <8 x i16> [[VABD2_I]]
int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
- // CHECK-LABEL: test_vabdq_s16
return vabdq_s16(v1, v2);
- // CHECK: sabd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
+// CHECK: ret <4 x i32> [[VABD2_I]]
int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
- // CHECK-LABEL: test_vabdq_s32
return vabdq_s32(v1, v2);
- // CHECK: sabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
+// CHECK: ret <16 x i8> [[VABD_I]]
uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
- // CHECK-LABEL: test_vabdq_u8
return vabdq_u8(v1, v2);
- // CHECK: uabd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
+// CHECK: ret <8 x i16> [[VABD2_I]]
uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
- // CHECK-LABEL: test_vabdq_u16
return vabdq_u16(v1, v2);
- // CHECK: uabd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
+// CHECK: ret <4 x i32> [[VABD2_I]]
uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
- // CHECK-LABEL: test_vabdq_u32
return vabdq_u32(v1, v2);
- // CHECK: uabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]]) #4
+// CHECK: ret <4 x float> [[VABD2_I]]
float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vabdq_f32
return vabdq_f32(v1, v2);
- // CHECK: fabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vabdq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]]) #4
+// CHECK: ret <2 x double> [[VABD2_I]]
float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vabdq_f64
return vabdq_f64(v1, v2);
- // CHECK: fabd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
+// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
+// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <8 x i8> [[VBSL2_I]]
int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
- // CHECK-LABEL: test_vbsl_s8
return vbsl_s8(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[TMP4]]
int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
- // CHECK-LABEL: test_vbsl_s16
return vbsl_s16(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <2 x i32> [[VBSL5_I]]
int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
- // CHECK-LABEL: test_vbsl_s32
return vbsl_s32(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <1 x i64> [[VBSL5_I]]
uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
- // CHECK-LABEL: test_vbsl_s64
return vbsl_s64(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
+// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
+// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <8 x i8> [[VBSL2_I]]
uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
- // CHECK-LABEL: test_vbsl_u8
return vbsl_u8(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i16> [[VBSL5_I]]
uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
- // CHECK-LABEL: test_vbsl_u16
return vbsl_u16(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <2 x i32> [[VBSL5_I]]
uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
- // CHECK-LABEL: test_vbsl_u32
return vbsl_u32(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <1 x i64> [[VBSL5_I]]
uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
- // CHECK-LABEL: test_vbsl_u64
return vbsl_u64(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP4:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
+// CHECK: ret <2 x float> [[TMP5]]
float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
- // CHECK-LABEL: test_vbsl_f32
return vbsl_f32(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
+// CHECK: ret <1 x double> [[TMP4]]
float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
- // CHECK-LABEL: test_vbsl_f64
return vbsl_f64(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
+// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
+// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <8 x i8> [[VBSL2_I]]
poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
- // CHECK-LABEL: test_vbsl_p8
return vbsl_p8(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i16> [[VBSL5_I]]
poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
- // CHECK-LABEL: test_vbsl_p16
return vbsl_p16(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
+// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
+// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <16 x i8> [[VBSL2_I]]
int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
- // CHECK-LABEL: test_vbslq_s8
return vbslq_s8(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <8 x i16> [[VBSL5_I]]
int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
- // CHECK-LABEL: test_vbslq_s16
return vbslq_s16(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i32> [[VBSL5_I]]
int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
- // CHECK-LABEL: test_vbslq_s32
return vbslq_s32(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <2 x i64> [[VBSL5_I]]
int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
- // CHECK-LABEL: test_vbslq_s64
return vbslq_s64(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
+// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
+// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <16 x i8> [[VBSL2_I]]
uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
- // CHECK-LABEL: test_vbslq_u8
return vbslq_u8(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <8 x i16> [[VBSL5_I]]
uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
- // CHECK-LABEL: test_vbslq_u16
return vbslq_u16(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <4 x i32> [[VBSL5_I]]
int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
- // CHECK-LABEL: test_vbslq_u32
return vbslq_s32(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <2 x i64> [[VBSL5_I]]
uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
- // CHECK-LABEL: test_vbslq_u64
return vbslq_u64(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
+// CHECK: ret <4 x float> [[TMP4]]
float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
- // CHECK-LABEL: test_vbslq_f32
return vbslq_f32(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
+// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
+// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
+// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: ret <16 x i8> [[VBSL2_I]]
poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
- // CHECK-LABEL: test_vbslq_p8
return vbslq_p8(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: ret <8 x i16> [[VBSL5_I]]
poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
- // CHECK-LABEL: test_vbslq_p16
return vbslq_p16(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
+// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
+// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
+// CHECK: ret <2 x double> [[TMP4]]
float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
- // CHECK-LABEL: test_vbslq_f64
return vbslq_f64(v1, v2, v3);
- // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4
+// CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float>
+// CHECK: ret <2 x float> [[TMP2]]
float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vrecps_f32
return vrecps_f32(v1, v2);
- // CHECK: frecps {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4
+// CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float>
+// CHECK: ret <4 x float> [[TMP2]]
float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vrecpsq_f32
return vrecpsq_f32(v1, v2);
- // CHECK: frecps {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vrecpsq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> [[VRECPSQ_V_I]], <2 x double> [[VRECPSQ_V1_I]]) #4
+// CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <2 x double>
+// CHECK: ret <2 x double> [[TMP2]]
float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vrecpsq_f64
return vrecpsq_f64(v1, v2);
- // CHECK: frecps {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4
+// CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float>
+// CHECK: ret <2 x float> [[TMP2]]
float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vrsqrts_f32
return vrsqrts_f32(v1, v2);
- // CHECK: frsqrts {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4
+// CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float>
+// CHECK: ret <4 x float> [[TMP2]]
float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vrsqrtsq_f32
return vrsqrtsq_f32(v1, v2);
- // CHECK: frsqrts {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vrsqrtsq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> [[VRSQRTSQ_V_I]], <2 x double> [[VRSQRTSQ_V1_I]]) #4
+// CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <2 x double>
+// CHECK: ret <2 x double> [[TMP2]]
float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vrsqrtsq_f64
return vrsqrtsq_f64(v1, v2);
- // CHECK: frsqrts {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4
+// CHECK: ret <2 x i32> [[VCAGE_V2_I]]
uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vcage_f32
return vcage_f32(v1, v2);
- // CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCAGE_V_I]], <1 x double> [[VCAGE_V1_I]]) #4
+// CHECK: ret <1 x i64> [[VCAGE_V2_I]]
uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
- // CHECK-LABEL: test_vcage_f64
return vcage_f64(a, b);
- // CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4
+// CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vcageq_f32
return vcageq_f32(v1, v2);
- // CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vcageq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCAGEQ_V_I]], <2 x double> [[VCAGEQ_V1_I]]) #4
+// CHECK: ret <2 x i64> [[VCAGEQ_V2_I]]
uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vcageq_f64
return vcageq_f64(v1, v2);
- // CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4
+// CHECK: ret <2 x i32> [[VCAGT_V2_I]]
uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vcagt_f32
return vcagt_f32(v1, v2);
- // CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCAGT_V_I]], <1 x double> [[VCAGT_V1_I]]) #4
+// CHECK: ret <1 x i64> [[VCAGT_V2_I]]
uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
- // CHECK-LABEL: test_vcagt_f64
return vcagt_f64(a, b);
- // CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4
+// CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vcagtq_f32
return vcagtq_f32(v1, v2);
- // CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vcagtq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCAGTQ_V_I]], <2 x double> [[VCAGTQ_V1_I]]) #4
+// CHECK: ret <2 x i64> [[VCAGTQ_V2_I]]
uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vcagtq_f64
return vcagtq_f64(v1, v2);
- // CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4
+// CHECK: ret <2 x i32> [[VCALE_V2_I]]
uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vcale_f32
return vcale_f32(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
- // CHECK: facge {{v[0-9]+}}.2s, v1.2s, v0.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCALE_V_I]], <1 x double> [[VCALE_V1_I]]) #4
+// CHECK: ret <1 x i64> [[VCALE_V2_I]]
uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
- // CHECK-LABEL: test_vcale_f64
return vcale_f64(a, b);
- // CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4
+// CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vcaleq_f32
return vcaleq_f32(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
- // CHECK: facge {{v[0-9]+}}.4s, v1.4s, v0.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vcaleq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCALEQ_V_I]], <2 x double> [[VCALEQ_V1_I]]) #4
+// CHECK: ret <2 x i64> [[VCALEQ_V2_I]]
uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vcaleq_f64
return vcaleq_f64(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
- // CHECK: facge {{v[0-9]+}}.2d, v1.2d, v0.2d
}
+// CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4
+// CHECK: ret <2 x i32> [[VCALT_V2_I]]
uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vcalt_f32
return vcalt_f32(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
- // CHECK: facgt {{v[0-9]+}}.2s, v1.2s, v0.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCALT_V_I]], <1 x double> [[VCALT_V1_I]]) #4
+// CHECK: ret <1 x i64> [[VCALT_V2_I]]
uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
- // CHECK-LABEL: test_vcalt_f64
return vcalt_f64(a, b);
- // CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4
+// CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vcaltq_f32
return vcaltq_f32(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
- // CHECK: facgt {{v[0-9]+}}.4s, v1.4s, v0.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vcaltq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCALTQ_V_I]], <2 x double> [[VCALTQ_V1_I]]) #4
+// CHECK: ret <2 x i64> [[VCALTQ_V2_I]]
uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vcaltq_f64
return vcaltq_f64(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
- // CHECK: facgt {{v[0-9]+}}.2d, v1.2d, v0.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
+// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VTST_I]]
uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
- // CHECK-LABEL: test_vtst_s8
return vtst_s8(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VTST_I]]
uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
- // CHECK-LABEL: test_vtst_s16
return vtst_s16(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VTST_I]]
uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
- // CHECK-LABEL: test_vtst_s32
return vtst_s32(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
+// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VTST_I]]
uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
- // CHECK-LABEL: test_vtst_u8
return vtst_u8(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VTST_I]]
uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
- // CHECK-LABEL: test_vtst_u16
return vtst_u16(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VTST_I]]
uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
- // CHECK-LABEL: test_vtst_u32
return vtst_u32(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
+// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// CHECK: ret <16 x i8> [[VTST_I]]
uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
- // CHECK-LABEL: test_vtstq_s8
return vtstq_s8(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+// CHECK: ret <8 x i16> [[VTST_I]]
uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
- // CHECK-LABEL: test_vtstq_s16
return vtstq_s16(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VTST_I]]
uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
- // CHECK-LABEL: test_vtstq_s32
return vtstq_s32(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
+// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// CHECK: ret <16 x i8> [[VTST_I]]
uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
- // CHECK-LABEL: test_vtstq_u8
return vtstq_u8(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+// CHECK: ret <8 x i16> [[VTST_I]]
uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
- // CHECK-LABEL: test_vtstq_u16
return vtstq_u16(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VTST_I]]
uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
- // CHECK-LABEL: test_vtstq_u32
return vtstq_u32(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vtstq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VTST_I]]
uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
- // CHECK-LABEL: test_vtstq_s64
return vtstq_s64(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vtstq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VTST_I]]
uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
- // CHECK-LABEL: test_vtstq_u64
return vtstq_u64(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
+// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VTST_I]]
uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
- // CHECK-LABEL: test_vtst_p8
return vtst_p8(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VTST_I]]
uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
- // CHECK-LABEL: test_vtst_p16
return vtst_p16(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
+// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// CHECK: ret <16 x i8> [[VTST_I]]
uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
- // CHECK-LABEL: test_vtstq_p8
return vtstq_p8(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+// CHECK: ret <8 x i16> [[VTST_I]]
uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
- // CHECK-LABEL: test_vtstq_p16
return vtstq_p16(v1, v2);
- // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
+// CHECK: ret <1 x i64> [[VTST_I]]
uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
- // CHECK-LABEL: test_vtst_s64
return vtst_s64(a, b);
- // CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
+// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
+// CHECK: ret <1 x i64> [[VTST_I]]
uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
- // CHECK-LABEL: test_vtst_u64
return vtst_u64(a, b);
- // CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
- // CHECK-LABEL: test_vceq_s8
return vceq_s8(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
- // CHECK-LABEL: test_vceq_s16
return vceq_s16(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
- // CHECK-LABEL: test_vceq_s32
return vceq_s32(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
- // CHECK-LABEL: test_vceq_s64
return vceq_s64(a, b);
- // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
- // CHECK-LABEL: test_vceq_u64
return vceq_u64(a, b);
- // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vceq_f32
return vceq_f32(v1, v2);
- // CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
- // CHECK-LABEL: test_vceq_f64
return vceq_f64(a, b);
- // CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
- // CHECK-LABEL: test_vceq_u8
return vceq_u8(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
- // CHECK-LABEL: test_vceq_u16
return vceq_u16(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
- // CHECK-LABEL: test_vceq_u32
return vceq_u32(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
- // CHECK-LABEL: test_vceq_p8
return vceq_p8(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
- // CHECK-LABEL: test_vceqq_s8
return vceqq_s8(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
- // CHECK-LABEL: test_vceqq_s16
return vceqq_s16(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
- // CHECK-LABEL: test_vceqq_s32
return vceqq_s32(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vceqq_f32
return vceqq_f32(v1, v2);
- // CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
- // CHECK-LABEL: test_vceqq_u8
return vceqq_u8(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
- // CHECK-LABEL: test_vceqq_u16
return vceqq_u16(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
- // CHECK-LABEL: test_vceqq_u32
return vceqq_u32(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
- // CHECK-LABEL: test_vceqq_p8
return vceqq_p8(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <2 x i64> @test_vceqq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
- // CHECK-LABEL: test_vceqq_s64
return vceqq_s64(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vceqq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
- // CHECK-LABEL: test_vceqq_u64
return vceqq_u64(v1, v2);
- // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vceqq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vceqq_f64
return vceqq_f64(v1, v2);
- // CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
-// CHECK-LABEL: test_vcge_s8
return vcge_s8(v1, v2);
-// CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
-// CHECK-LABEL: test_vcge_s16
return vcge_s16(v1, v2);
-// CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
-// CHECK-LABEL: test_vcge_s32
return vcge_s32(v1, v2);
-// CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
- // CHECK-LABEL: test_vcge_s64
return vcge_s64(a, b);
- // CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
- // CHECK-LABEL: test_vcge_u64
return vcge_u64(a, b);
- // CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
-// CHECK-LABEL: test_vcge_f32
return vcge_f32(v1, v2);
-// CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
- // CHECK-LABEL: test_vcge_f64
return vcge_f64(a, b);
- // CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
-// CHECK-LABEL: test_vcge_u8
return vcge_u8(v1, v2);
-// CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
-// CHECK-LABEL: test_vcge_u16
return vcge_u16(v1, v2);
-// CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
-// CHECK-LABEL: test_vcge_u32
return vcge_u32(v1, v2);
-// CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
-// CHECK-LABEL: test_vcgeq_s8
return vcgeq_s8(v1, v2);
-// CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
-// CHECK-LABEL: test_vcgeq_s16
return vcgeq_s16(v1, v2);
-// CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
-// CHECK-LABEL: test_vcgeq_s32
return vcgeq_s32(v1, v2);
-// CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
-// CHECK-LABEL: test_vcgeq_f32
return vcgeq_f32(v1, v2);
-// CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
-// CHECK-LABEL: test_vcgeq_u8
return vcgeq_u8(v1, v2);
-// CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
-// CHECK-LABEL: test_vcgeq_u16
return vcgeq_u16(v1, v2);
-// CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
-// CHECK-LABEL: test_vcgeq_u32
return vcgeq_u32(v1, v2);
-// CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vcgeq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
-// CHECK-LABEL: test_vcgeq_s64
return vcgeq_s64(v1, v2);
-// CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vcgeq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
-// CHECK-LABEL: test_vcgeq_u64
return vcgeq_u64(v1, v2);
-// CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vcgeq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
-// CHECK-LABEL: test_vcgeq_f64
return vcgeq_f64(v1, v2);
-// CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
// Notes about vcle:
// LE condition predicate implemented as GE, so check reversed operands.
// Using registers other than v0, v1 are possible, but would be odd.
+// CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
- // CHECK-LABEL: test_vcle_s8
return vcle_s8(v1, v2);
- // CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
- // CHECK-LABEL: test_vcle_s16
return vcle_s16(v1, v2);
- // CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
- // CHECK-LABEL: test_vcle_s32
return vcle_s32(v1, v2);
- // CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
- // CHECK-LABEL: test_vcle_s64
return vcle_s64(a, b);
- // CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
- // CHECK-LABEL: test_vcle_u64
return vcle_u64(a, b);
- // CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vcle_f32
return vcle_f32(v1, v2);
- // CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
- // CHECK-LABEL: test_vcle_f64
return vcle_f64(a, b);
- // CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
- // CHECK-LABEL: test_vcle_u8
return vcle_u8(v1, v2);
- // CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
- // CHECK-LABEL: test_vcle_u16
return vcle_u16(v1, v2);
- // CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
- // CHECK-LABEL: test_vcle_u32
return vcle_u32(v1, v2);
- // CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
- // CHECK-LABEL: test_vcleq_s8
return vcleq_s8(v1, v2);
- // CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
- // CHECK-LABEL: test_vcleq_s16
return vcleq_s16(v1, v2);
- // CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
- // CHECK-LABEL: test_vcleq_s32
return vcleq_s32(v1, v2);
- // CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vcleq_f32
return vcleq_f32(v1, v2);
- // CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
- // CHECK-LABEL: test_vcleq_u8
return vcleq_u8(v1, v2);
- // CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
- // CHECK-LABEL: test_vcleq_u16
return vcleq_u16(v1, v2);
- // CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
- // CHECK-LABEL: test_vcleq_u32
return vcleq_u32(v1, v2);
- // CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vcleq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
- // CHECK-LABEL: test_vcleq_s64
return vcleq_s64(v1, v2);
- // CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vcleq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
- // CHECK-LABEL: test_vcleq_u64
return vcleq_u64(v1, v2);
- // CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vcleq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vcleq_f64
return vcleq_f64(v1, v2);
- // CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
- // CHECK-LABEL: test_vcgt_s8
return vcgt_s8(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
- // CHECK-LABEL: test_vcgt_s16
return vcgt_s16(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
- // CHECK-LABEL: test_vcgt_s32
return vcgt_s32(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
- // CHECK-LABEL: test_vcgt_s64
return vcgt_s64(a, b);
- // CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
- // CHECK-LABEL: test_vcgt_u64
return vcgt_u64(a, b);
- // CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vcgt_f32
return vcgt_f32(v1, v2);
- // CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
- // CHECK-LABEL: test_vcgt_f64
return vcgt_f64(a, b);
- // CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
- // CHECK-LABEL: test_vcgt_u8
return vcgt_u8(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
- // CHECK-LABEL: test_vcgt_u16
return vcgt_u16(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
- // CHECK-LABEL: test_vcgt_u32
return vcgt_u32(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
- // CHECK-LABEL: test_vcgtq_s8
return vcgtq_s8(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
- // CHECK-LABEL: test_vcgtq_s16
return vcgtq_s16(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
- // CHECK-LABEL: test_vcgtq_s32
return vcgtq_s32(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vcgtq_f32
return vcgtq_f32(v1, v2);
- // CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
- // CHECK-LABEL: test_vcgtq_u8
return vcgtq_u8(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
- // CHECK-LABEL: test_vcgtq_u16
return vcgtq_u16(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
- // CHECK-LABEL: test_vcgtq_u32
return vcgtq_u32(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vcgtq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
- // CHECK-LABEL: test_vcgtq_s64
return vcgtq_s64(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vcgtq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
- // CHECK-LABEL: test_vcgtq_u64
return vcgtq_u64(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vcgtq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vcgtq_f64
return vcgtq_f64(v1, v2);
- // CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
@@ -1632,10112 +2529,20029 @@ uint64x2_t test_vcgtq_f64(float64x2_t v1
// LT condition predicate implemented as GT, so check reversed operands.
// Using registers other than v0, v1 are possible, but would be odd.
+// CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
- // CHECK-LABEL: test_vclt_s8
return vclt_s8(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
- // CHECK-LABEL: test_vclt_s16
return vclt_s16(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
- // CHECK-LABEL: test_vclt_s32
return vclt_s32(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
- // CHECK-LABEL: test_vclt_s64
return vclt_s64(a, b);
- // CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
- // CHECK-LABEL: test_vclt_u64
return vclt_u64(a, b);
- // CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
- // CHECK-LABEL: test_vclt_f32
return vclt_f32(v1, v2);
- // CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
+// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[SEXT_I]]
uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
- // CHECK-LABEL: test_vclt_f64
return vclt_f64(a, b);
- // CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[SEXT_I]]
uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
- // CHECK-LABEL: test_vclt_u8
return vclt_u8(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[SEXT_I]]
uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
- // CHECK-LABEL: test_vclt_u16
return vclt_u16(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[SEXT_I]]
uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
- // CHECK-LABEL: test_vclt_u32
return vclt_u32(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
- // CHECK-LABEL: test_vcltq_s8
return vcltq_s8(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
- // CHECK-LABEL: test_vcltq_s16
return vcltq_s16(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
- // CHECK-LABEL: test_vcltq_s32
return vcltq_s32(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
- // CHECK-LABEL: test_vcltq_f32
return vcltq_f32(v1, v2);
- // CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK: ret <16 x i8> [[SEXT_I]]
uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
- // CHECK-LABEL: test_vcltq_u8
return vcltq_u8(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[SEXT_I]]
uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
- // CHECK-LABEL: test_vcltq_u16
return vcltq_u16(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[SEXT_I]]
uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
- // CHECK-LABEL: test_vcltq_u32
return vcltq_u32(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vcltq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
- // CHECK-LABEL: test_vcltq_s64
return vcltq_s64(v1, v2);
- // CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vcltq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
- // CHECK-LABEL: test_vcltq_u64
return vcltq_u64(v1, v2);
- // CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
}
+// CHECK-LABEL: define <2 x i64> @test_vcltq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
+// CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
+// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[SEXT_I]]
uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
- // CHECK-LABEL: test_vcltq_f64
return vcltq_f64(v1, v2);
- // CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
+// CHECK: ret <8 x i8> [[VHADD_V_I]]
int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
-// CHECK-LABEL: test_vhadd_s8
return vhadd_s8(v1, v2);
- // CHECK: shadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
+// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
-// CHECK-LABEL: test_vhadd_s16
return vhadd_s16(v1, v2);
- // CHECK: shadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
+// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
-// CHECK-LABEL: test_vhadd_s32
return vhadd_s32(v1, v2);
- // CHECK: shadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
+// CHECK: ret <8 x i8> [[VHADD_V_I]]
uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
-// CHECK-LABEL: test_vhadd_u8
return vhadd_u8(v1, v2);
- // CHECK: uhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
+// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
-// CHECK-LABEL: test_vhadd_u16
return vhadd_u16(v1, v2);
- // CHECK: uhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
+// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
-// CHECK-LABEL: test_vhadd_u32
return vhadd_u32(v1, v2);
- // CHECK: uhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
+// CHECK: ret <16 x i8> [[VHADDQ_V_I]]
int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
-// CHECK-LABEL: test_vhaddq_s8
return vhaddq_s8(v1, v2);
- // CHECK: shadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
+// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
-// CHECK-LABEL: test_vhaddq_s16
return vhaddq_s16(v1, v2);
- // CHECK: shadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
+// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
-// CHECK-LABEL: test_vhaddq_s32
return vhaddq_s32(v1, v2);
- // CHECK: shadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
+// CHECK: ret <16 x i8> [[VHADDQ_V_I]]
uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
-// CHECK-LABEL: test_vhaddq_u8
return vhaddq_u8(v1, v2);
- // CHECK: uhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
+// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
-// CHECK-LABEL: test_vhaddq_u16
return vhaddq_u16(v1, v2);
- // CHECK: uhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
+// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
-// CHECK-LABEL: test_vhaddq_u32
return vhaddq_u32(v1, v2);
- // CHECK: uhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
+// CHECK: ret <8 x i8> [[VHSUB_V_I]]
int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
-// CHECK-LABEL: test_vhsub_s8
return vhsub_s8(v1, v2);
- // CHECK: shsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
+// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
-// CHECK-LABEL: test_vhsub_s16
return vhsub_s16(v1, v2);
- // CHECK: shsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
+// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
-// CHECK-LABEL: test_vhsub_s32
return vhsub_s32(v1, v2);
- // CHECK: shsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
+// CHECK: ret <8 x i8> [[VHSUB_V_I]]
uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
-// CHECK-LABEL: test_vhsub_u8
return vhsub_u8(v1, v2);
- // CHECK: uhsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
+// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
-// CHECK-LABEL: test_vhsub_u16
return vhsub_u16(v1, v2);
- // CHECK: uhsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
+// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
-// CHECK-LABEL: test_vhsub_u32
return vhsub_u32(v1, v2);
- // CHECK: uhsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
+// CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
-// CHECK-LABEL: test_vhsubq_s8
return vhsubq_s8(v1, v2);
- // CHECK: shsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
+// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
-// CHECK-LABEL: test_vhsubq_s16
return vhsubq_s16(v1, v2);
- // CHECK: shsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
+// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
-// CHECK-LABEL: test_vhsubq_s32
return vhsubq_s32(v1, v2);
- // CHECK: shsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
+// CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
-// CHECK-LABEL: test_vhsubq_u8
return vhsubq_u8(v1, v2);
- // CHECK: uhsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
+// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
-// CHECK-LABEL: test_vhsubq_u16
return vhsubq_u16(v1, v2);
- // CHECK: uhsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
+// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
-// CHECK-LABEL: test_vhsubq_u32
return vhsubq_u32(v1, v2);
- // CHECK: uhsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
+// CHECK: ret <8 x i8> [[VRHADD_V_I]]
int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
-// CHECK-LABEL: test_vrhadd_s8
return vrhadd_s8(v1, v2);
-// CHECK: srhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
+// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
-// CHECK-LABEL: test_vrhadd_s16
return vrhadd_s16(v1, v2);
-// CHECK: srhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
+// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
-// CHECK-LABEL: test_vrhadd_s32
return vrhadd_s32(v1, v2);
-// CHECK: srhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
+// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
+// CHECK: ret <8 x i8> [[VRHADD_V_I]]
uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
-// CHECK-LABEL: test_vrhadd_u8
return vrhadd_u8(v1, v2);
-// CHECK: urhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
+// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
-// CHECK-LABEL: test_vrhadd_u16
return vrhadd_u16(v1, v2);
-// CHECK: urhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
+// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
-// CHECK-LABEL: test_vrhadd_u32
return vrhadd_u32(v1, v2);
-// CHECK: urhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
+// CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
-// CHECK-LABEL: test_vrhaddq_s8
return vrhaddq_s8(v1, v2);
-// CHECK: srhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
+// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
-// CHECK-LABEL: test_vrhaddq_s16
return vrhaddq_s16(v1, v2);
-// CHECK: srhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
+// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
-// CHECK-LABEL: test_vrhaddq_s32
return vrhaddq_s32(v1, v2);
-// CHECK: srhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
+// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
+// CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
-// CHECK-LABEL: test_vrhaddq_u8
return vrhaddq_u8(v1, v2);
-// CHECK: urhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
+// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
-// CHECK-LABEL: test_vrhaddq_u16
return vrhaddq_u16(v1, v2);
-// CHECK: urhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
+// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
-// CHECK-LABEL: test_vrhaddq_u32
return vrhaddq_u32(v1, v2);
-// CHECK: urhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQADD_V_I]]
int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vqadd_s8
return vqadd_s8(a, b);
- // CHECK: sqadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vqadd_s16
return vqadd_s16(a, b);
- // CHECK: sqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vqadd_s32
return vqadd_s32(a, b);
- // CHECK: sqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vqadd_s64
return vqadd_s64(a, b);
-// CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQADD_V_I]]
uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
-// CHECK-LABEL: test_vqadd_u8
return vqadd_u8(a, b);
- // CHECK: uqadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
-// CHECK-LABEL: test_vqadd_u16
return vqadd_u16(a, b);
- // CHECK: uqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
-// CHECK-LABEL: test_vqadd_u32
return vqadd_u32(a, b);
- // CHECK: uqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
+// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
-// CHECK: test_vqadd_u64
return vqadd_u64(a, b);
-// CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQADDQ_V_I]]
int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vqaddq_s8
return vqaddq_s8(a, b);
- // CHECK: sqadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vqaddq_s16
return vqaddq_s16(a, b);
- // CHECK: sqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vqaddq_s32
return vqaddq_s32(a, b);
- // CHECK: sqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vqaddq_s64
return vqaddq_s64(a, b);
-// CHECK: sqadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQADDQ_V_I]]
uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
-// CHECK-LABEL: test_vqaddq_u8
return vqaddq_u8(a, b);
- // CHECK: uqadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
-// CHECK-LABEL: test_vqaddq_u16
return vqaddq_u16(a, b);
- // CHECK: uqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
-// CHECK-LABEL: test_vqaddq_u32
return vqaddq_u32(a, b);
- // CHECK: uqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
+// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
-// CHECK-LABEL: test_vqaddq_u64
return vqaddq_u64(a, b);
-// CHECK: uqadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQSUB_V_I]]
int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vqsub_s8
return vqsub_s8(a, b);
- // CHECK: sqsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vqsub_s16
return vqsub_s16(a, b);
- // CHECK: sqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vqsub_s32
return vqsub_s32(a, b);
- // CHECK: sqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vqsub_s64
return vqsub_s64(a, b);
-// CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQSUB_V_I]]
uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
-// CHECK-LABEL: test_vqsub_u8
return vqsub_u8(a, b);
- // CHECK: uqsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
-// CHECK-LABEL: test_vqsub_u16
return vqsub_u16(a, b);
- // CHECK: uqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
-// CHECK-LABEL: test_vqsub_u32
return vqsub_u32(a, b);
- // CHECK: uqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
+// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
-// CHECK-LABEL: test_vqsub_u64
return vqsub_u64(a, b);
-// CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vqsubq_s8
return vqsubq_s8(a, b);
- // CHECK: sqsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vqsubq_s16
return vqsubq_s16(a, b);
- // CHECK: sqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vqsubq_s32
return vqsubq_s32(a, b);
- // CHECK: sqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vqsubq_s64
return vqsubq_s64(a, b);
-// CHECK: sqsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
-// CHECK-LABEL: test_vqsubq_u8
return vqsubq_u8(a, b);
- // CHECK: uqsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
-// CHECK-LABEL: test_vqsubq_u16
return vqsubq_u16(a, b);
- // CHECK: uqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
-// CHECK-LABEL: test_vqsubq_u32
return vqsubq_u32(a, b);
- // CHECK: uqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
+// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
-// CHECK-LABEL: test_vqsubq_u64
return vqsubq_u64(a, b);
- // CHECK: uqsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VSHL_V_I]]
int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vshl_s8
return vshl_s8(a, b);
-// CHECK: sshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
+// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vshl_s16
return vshl_s16(a, b);
-// CHECK: sshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
+// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vshl_s32
return vshl_s32(a, b);
-// CHECK: sshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
+// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vshl_s64
return vshl_s64(a, b);
-// CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VSHL_V_I]]
uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vshl_u8
return vshl_u8(a, b);
-// CHECK: ushl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
+// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vshl_u16
return vshl_u16(a, b);
-// CHECK: ushl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
+// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vshl_u32
return vshl_u32(a, b);
-// CHECK: ushl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
+// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vshl_u64
return vshl_u64(a, b);
-// CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VSHLQ_V_I]]
int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vshlq_s8
return vshlq_s8(a, b);
-// CHECK: sshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
+// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vshlq_s16
return vshlq_s16(a, b);
-// CHECK: sshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
+// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vshlq_s32
return vshlq_s32(a, b);
-// CHECK: sshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
+// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vshlq_s64
return vshlq_s64(a, b);
-// CHECK: sshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VSHLQ_V_I]]
uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vshlq_u8
return vshlq_u8(a, b);
-// CHECK: ushl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
+// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vshlq_u16
return vshlq_u16(a, b);
-// CHECK: ushl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
+// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vshlq_u32
return vshlq_u32(a, b);
-// CHECK: ushl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
+// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vshlq_u64
return vshlq_u64(a, b);
-// CHECK: ushl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQSHL_V_I]]
int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vqshl_s8
return vqshl_s8(a, b);
-// CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vqshl_s16
return vqshl_s16(a, b);
-// CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vqshl_s32
return vqshl_s32(a, b);
-// CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vqshl_s64
return vqshl_s64(a, b);
-// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQSHL_V_I]]
uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vqshl_u8
return vqshl_u8(a, b);
-// CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vqshl_u16
return vqshl_u16(a, b);
-// CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vqshl_u32
return vqshl_u32(a, b);
-// CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
+// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vqshl_u64
return vqshl_u64(a, b);
-// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vqshlq_s8
return vqshlq_s8(a, b);
-// CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vqshlq_s16
return vqshlq_s16(a, b);
-// CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vqshlq_s32
return vqshlq_s32(a, b);
-// CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vqshlq_s64
return vqshlq_s64(a, b);
-// CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vqshlq_u8
return vqshlq_u8(a, b);
-// CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vqshlq_u16
return vqshlq_u16(a, b);
-// CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vqshlq_u32
return vqshlq_u32(a, b);
-// CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
+// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vqshlq_u64
return vqshlq_u64(a, b);
-// CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VRSHL_V_I]]
int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vrshl_s8
return vrshl_s8(a, b);
-// CHECK: srshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
+// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vrshl_s16
return vrshl_s16(a, b);
-// CHECK: srshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
+// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vrshl_s32
return vrshl_s32(a, b);
-// CHECK: srshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
+// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vrshl_s64
return vrshl_s64(a, b);
-// CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VRSHL_V_I]]
uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vrshl_u8
return vrshl_u8(a, b);
-// CHECK: urshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
+// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vrshl_u16
return vrshl_u16(a, b);
-// CHECK: urshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
+// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vrshl_u32
return vrshl_u32(a, b);
-// CHECK: urshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
+// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vrshl_u64
return vrshl_u64(a, b);
-// CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vrshlq_s8
return vrshlq_s8(a, b);
-// CHECK: srshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
+// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vrshlq_s16
return vrshlq_s16(a, b);
-// CHECK: srshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
+// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vrshlq_s32
return vrshlq_s32(a, b);
-// CHECK: srshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
+// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vrshlq_s64
return vrshlq_s64(a, b);
-// CHECK: srshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vrshlq_u8
return vrshlq_u8(a, b);
-// CHECK: urshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
+// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vrshlq_u16
return vrshlq_u16(a, b);
-// CHECK: urshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
+// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vrshlq_u32
return vrshlq_u32(a, b);
-// CHECK: urshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
+// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vrshlq_u64
return vrshlq_u64(a, b);
-// CHECK: urshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQRSHL_V_I]]
int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vqrshl_s8
return vqrshl_s8(a, b);
-// CHECK: sqrshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vqrshl_s16
return vqrshl_s16(a, b);
-// CHECK: sqrshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vqrshl_s32
return vqrshl_s32(a, b);
-// CHECK: sqrshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vqrshl_s64
return vqrshl_s64(a, b);
-// CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VQRSHL_V_I]]
uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vqrshl_u8
return vqrshl_u8(a, b);
-// CHECK: uqrshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vqrshl_u16
return vqrshl_u16(a, b);
-// CHECK: uqrshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vqrshl_u32
return vqrshl_u32(a, b);
-// CHECK: uqrshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
+// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
-// CHECK-LABEL: test_vqrshl_u64
return vqrshl_u64(a, b);
-// CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vqrshlq_s8
return vqrshlq_s8(a, b);
-// CHECK: sqrshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vqrshlq_s16
return vqrshlq_s16(a, b);
-// CHECK: sqrshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vqrshlq_s32
return vqrshlq_s32(a, b);
-// CHECK: sqrshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vqrshlq_s64
return vqrshlq_s64(a, b);
-// CHECK: sqrshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
-// CHECK-LABEL: test_vqrshlq_u8
+// CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
return vqrshlq_u8(a, b);
-// CHECK: uqrshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vqrshlq_u16
return vqrshlq_u16(a, b);
-// CHECK: uqrshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vqrshlq_u32
return vqrshlq_u32(a, b);
-// CHECK: uqrshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
+// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
-// CHECK-LABEL: test_vqrshlq_u64
return vqrshlq_u64(a, b);
-// CHECK: uqrshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N]]1, i32 0)
+// CHECK: ret <1 x i64> [[VSLI_N]]2
poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
-// CHECK-LABEL: test_vsli_n_p64
return vsli_n_p64(a, b, 0);
-// CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #0
}
+// CHECK-LABEL: define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N]]1, i32 0)
+// CHECK: ret <2 x i64> [[VSLI_N]]2
poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
-// CHECK-LABEL: test_vsliq_n_p64
return vsliq_n_p64(a, b, 0);
-// CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
}
+// CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VMAX_I]]
int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vmax_s8
return vmax_s8(a, b);
-// CHECK: smax {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
+// CHECK: ret <4 x i16> [[VMAX2_I]]
int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vmax_s16
return vmax_s16(a, b);
-// CHECK: smax {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
+// CHECK: ret <2 x i32> [[VMAX2_I]]
int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vmax_s32
return vmax_s32(a, b);
-// CHECK: smax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VMAX_I]]
uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
-// CHECK-LABEL: test_vmax_u8
return vmax_u8(a, b);
-// CHECK: umax {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
+// CHECK: ret <4 x i16> [[VMAX2_I]]
uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
-// CHECK-LABEL: test_vmax_u16
return vmax_u16(a, b);
-// CHECK: umax {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
+// CHECK: ret <2 x i32> [[VMAX2_I]]
uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
-// CHECK-LABEL: test_vmax_u32
return vmax_u32(a, b);
-// CHECK: umax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> [[VMAX_I]], <2 x float> [[VMAX1_I]]) #4
+// CHECK: ret <2 x float> [[VMAX2_I]]
float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vmax_f32
return vmax_f32(a, b);
-// CHECK: fmax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VMAX_I]]
int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vmaxq_s8
return vmaxq_s8(a, b);
-// CHECK: smax {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
+// CHECK: ret <8 x i16> [[VMAX2_I]]
int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vmaxq_s16
return vmaxq_s16(a, b);
-// CHECK: smax {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
+// CHECK: ret <4 x i32> [[VMAX2_I]]
int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vmaxq_s32
return vmaxq_s32(a, b);
-// CHECK: smax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VMAX_I]]
uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
-// CHECK-LABEL: test_vmaxq_u8
return vmaxq_u8(a, b);
-// CHECK: umax {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
+// CHECK: ret <8 x i16> [[VMAX2_I]]
uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
-// CHECK-LABEL: test_vmaxq_u16
return vmaxq_u16(a, b);
-// CHECK: umax {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
+// CHECK: ret <4 x i32> [[VMAX2_I]]
uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
-// CHECK-LABEL: test_vmaxq_u32
return vmaxq_u32(a, b);
-// CHECK: umax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> [[VMAX_I]], <4 x float> [[VMAX1_I]]) #4
+// CHECK: ret <4 x float> [[VMAX2_I]]
float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vmaxq_f32
return vmaxq_f32(a, b);
-// CHECK: fmax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vmaxq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> [[VMAX_I]], <2 x double> [[VMAX1_I]]) #4
+// CHECK: ret <2 x double> [[VMAX2_I]]
float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vmaxq_f64
return vmaxq_f64(a, b);
-// CHECK: fmax {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VMIN_I]]
int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vmin_s8
return vmin_s8(a, b);
-// CHECK: smin {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
+// CHECK: ret <4 x i16> [[VMIN2_I]]
int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vmin_s16
return vmin_s16(a, b);
-// CHECK: smin {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
+// CHECK: ret <2 x i32> [[VMIN2_I]]
int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vmin_s32
return vmin_s32(a, b);
-// CHECK: smin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VMIN_I]]
uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
-// CHECK-LABEL: test_vmin_u8
return vmin_u8(a, b);
-// CHECK: umin {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
+// CHECK: ret <4 x i16> [[VMIN2_I]]
uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
-// CHECK-LABEL: test_vmin_u16
return vmin_u16(a, b);
-// CHECK: umin {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
+// CHECK: ret <2 x i32> [[VMIN2_I]]
uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
-// CHECK-LABEL: test_vmin_u32
return vmin_u32(a, b);
-// CHECK: umin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> [[VMIN_I]], <2 x float> [[VMIN1_I]]) #4
+// CHECK: ret <2 x float> [[VMIN2_I]]
float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vmin_f32
return vmin_f32(a, b);
-// CHECK: fmin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VMIN_I]]
int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vminq_s8
return vminq_s8(a, b);
-// CHECK: smin {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
+// CHECK: ret <8 x i16> [[VMIN2_I]]
int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vminq_s16
return vminq_s16(a, b);
-// CHECK: smin {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
+// CHECK: ret <4 x i32> [[VMIN2_I]]
int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vminq_s32
return vminq_s32(a, b);
-// CHECK: smin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VMIN_I]]
uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
-// CHECK-LABEL: test_vminq_u8
return vminq_u8(a, b);
-// CHECK: umin {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
+// CHECK: ret <8 x i16> [[VMIN2_I]]
uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
-// CHECK-LABEL: test_vminq_u16
return vminq_u16(a, b);
-// CHECK: umin {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
+// CHECK: ret <4 x i32> [[VMIN2_I]]
uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
-// CHECK-LABEL: test_vminq_u32
return vminq_u32(a, b);
-// CHECK: umin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> [[VMIN_I]], <4 x float> [[VMIN1_I]]) #4
+// CHECK: ret <4 x float> [[VMIN2_I]]
float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vminq_f32
return vminq_f32(a, b);
-// CHECK: fmin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vminq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> [[VMIN_I]], <2 x double> [[VMIN1_I]]) #4
+// CHECK: ret <2 x double> [[VMIN2_I]]
float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vminq_f64
return vminq_f64(a, b);
-// CHECK: fmin {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> [[VMAXNM_I]], <2 x float> [[VMAXNM1_I]]) #4
+// CHECK: ret <2 x float> [[VMAXNM2_I]]
float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vmaxnm_f32
return vmaxnm_f32(a, b);
-// CHECK: fmaxnm {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[VMAXNM_I]], <4 x float> [[VMAXNM1_I]]) #4
+// CHECK: ret <4 x float> [[VMAXNM2_I]]
float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vmaxnmq_f32
return vmaxnmq_f32(a, b);
-// CHECK: fmaxnm {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[VMAXNM_I]], <2 x double> [[VMAXNM1_I]]) #4
+// CHECK: ret <2 x double> [[VMAXNM2_I]]
float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vmaxnmq_f64
return vmaxnmq_f64(a, b);
-// CHECK: fmaxnm {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> [[VMINNM_I]], <2 x float> [[VMINNM1_I]]) #4
+// CHECK: ret <2 x float> [[VMINNM2_I]]
float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vminnm_f32
return vminnm_f32(a, b);
-// CHECK: fminnm {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[VMINNM_I]], <4 x float> [[VMINNM1_I]]) #4
+// CHECK: ret <4 x float> [[VMINNM2_I]]
float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vminnmq_f32
return vminnmq_f32(a, b);
-// CHECK: fminnm {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vminnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[VMINNM_I]], <2 x double> [[VMINNM1_I]]) #4
+// CHECK: ret <2 x double> [[VMINNM2_I]]
float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vminnmq_f64
return vminnmq_f64(a, b);
-// CHECK: fminnm {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPMAX_I]]
int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vpmax_s8
return vpmax_s8(a, b);
-// CHECK: smaxp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
+// CHECK: ret <4 x i16> [[VPMAX2_I]]
int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vpmax_s16
return vpmax_s16(a, b);
-// CHECK: smaxp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
+// CHECK: ret <2 x i32> [[VPMAX2_I]]
int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vpmax_s32
return vpmax_s32(a, b);
-// CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPMAX_I]]
uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
-// CHECK-LABEL: test_vpmax_u8
return vpmax_u8(a, b);
-// CHECK: umaxp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
+// CHECK: ret <4 x i16> [[VPMAX2_I]]
uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
-// CHECK-LABEL: test_vpmax_u16
return vpmax_u16(a, b);
-// CHECK: umaxp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
+// CHECK: ret <2 x i32> [[VPMAX2_I]]
uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
-// CHECK-LABEL: test_vpmax_u32
return vpmax_u32(a, b);
-// CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) #4
+// CHECK: ret <2 x float> [[VPMAX2_I]]
float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vpmax_f32
return vpmax_f32(a, b);
-// CHECK: fmaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vpmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VPMAX_I]]
int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vpmaxq_s8
return vpmaxq_s8(a, b);
-// CHECK: smaxp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vpmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4
+// CHECK: ret <8 x i16> [[VPMAX2_I]]
int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vpmaxq_s16
return vpmaxq_s16(a, b);
-// CHECK: smaxp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vpmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4
+// CHECK: ret <4 x i32> [[VPMAX2_I]]
int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vpmaxq_s32
return vpmaxq_s32(a, b);
-// CHECK: smaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vpmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VPMAX_I]]
uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
-// CHECK-LABEL: test_vpmaxq_u8
return vpmaxq_u8(a, b);
-// CHECK: umaxp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vpmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4
+// CHECK: ret <8 x i16> [[VPMAX2_I]]
uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
-// CHECK-LABEL: test_vpmaxq_u16
return vpmaxq_u16(a, b);
-// CHECK: umaxp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vpmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4
+// CHECK: ret <4 x i32> [[VPMAX2_I]]
uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
-// CHECK-LABEL: test_vpmaxq_u32
return vpmaxq_u32(a, b);
-// CHECK: umaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x float> @test_vpmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]]) #4
+// CHECK: ret <4 x float> [[VPMAX2_I]]
float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vpmaxq_f32
return vpmaxq_f32(a, b);
-// CHECK: fmaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vpmaxq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]]) #4
+// CHECK: ret <2 x double> [[VPMAX2_I]]
float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vpmaxq_f64
return vpmaxq_f64(a, b);
-// CHECK: fmaxp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPMIN_I]]
int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vpmin_s8
return vpmin_s8(a, b);
-// CHECK: sminp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
+// CHECK: ret <4 x i16> [[VPMIN2_I]]
int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vpmin_s16
return vpmin_s16(a, b);
-// CHECK: sminp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
+// CHECK: ret <2 x i32> [[VPMIN2_I]]
int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vpmin_s32
return vpmin_s32(a, b);
-// CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPMIN_I]]
uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
-// CHECK-LABEL: test_vpmin_u8
return vpmin_u8(a, b);
-// CHECK: uminp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
+// CHECK: ret <4 x i16> [[VPMIN2_I]]
uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
-// CHECK-LABEL: test_vpmin_u16
return vpmin_u16(a, b);
-// CHECK: uminp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
+// CHECK: ret <2 x i32> [[VPMIN2_I]]
uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
-// CHECK-LABEL: test_vpmin_u32
return vpmin_u32(a, b);
-// CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> [[VPMIN1_I]]) #4
+// CHECK: ret <2 x float> [[VPMIN2_I]]
float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vpmin_f32
return vpmin_f32(a, b);
-// CHECK: fminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vpminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VPMIN_I]]
int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vpminq_s8
return vpminq_s8(a, b);
-// CHECK: sminp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vpminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4
+// CHECK: ret <8 x i16> [[VPMIN2_I]]
int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vpminq_s16
return vpminq_s16(a, b);
-// CHECK: sminp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vpminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4
+// CHECK: ret <4 x i32> [[VPMIN2_I]]
int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vpminq_s32
return vpminq_s32(a, b);
-// CHECK: sminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vpminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VPMIN_I]]
uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
-// CHECK-LABEL: test_vpminq_u8
return vpminq_u8(a, b);
-// CHECK: uminp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vpminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4
+// CHECK: ret <8 x i16> [[VPMIN2_I]]
uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
-// CHECK-LABEL: test_vpminq_u16
return vpminq_u16(a, b);
-// CHECK: uminp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vpminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4
+// CHECK: ret <4 x i32> [[VPMIN2_I]]
uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
-// CHECK-LABEL: test_vpminq_u32
return vpminq_u32(a, b);
-// CHECK: uminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x float> @test_vpminq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> [[VPMIN1_I]]) #4
+// CHECK: ret <4 x float> [[VPMIN2_I]]
float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vpminq_f32
return vpminq_f32(a, b);
-// CHECK: fminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vpminq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> [[VPMIN1_I]]) #4
+// CHECK: ret <2 x double> [[VPMIN2_I]]
float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vpminq_f64
return vpminq_f64(a, b);
-// CHECK: fminp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x float> @test_vpmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VPMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[VPMAXNM_I]], <2 x float> [[VPMAXNM1_I]]) #4
+// CHECK: ret <2 x float> [[VPMAXNM2_I]]
float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vpmaxnm_f32
return vpmaxnm_f32(a, b);
-// CHECK: fmaxnmp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x float> @test_vpmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[VPMAXNM_I]], <4 x float> [[VPMAXNM1_I]]) #4
+// CHECK: ret <4 x float> [[VPMAXNM2_I]]
float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vpmaxnmq_f32
return vpmaxnmq_f32(a, b);
-// CHECK: fmaxnmp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vpmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[VPMAXNM_I]], <2 x double> [[VPMAXNM1_I]]) #4
+// CHECK: ret <2 x double> [[VPMAXNM2_I]]
float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vpmaxnmq_f64
return vpmaxnmq_f64(a, b);
-// CHECK: fmaxnmp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <2 x float> @test_vpminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> [[VPMINNM1_I]]) #4
+// CHECK: ret <2 x float> [[VPMINNM2_I]]
float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vpminnm_f32
return vpminnm_f32(a, b);
-// CHECK: fminnmp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x float> @test_vpminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> [[VPMINNM1_I]]) #4
+// CHECK: ret <4 x float> [[VPMINNM2_I]]
float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vpminnmq_f32
return vpminnmq_f32(a, b);
-// CHECK: fminnmp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vpminnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> [[VPMINNM1_I]]) #4
+// CHECK: ret <2 x double> [[VPMINNM2_I]]
float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vpminnmq_f64
return vpminnmq_f64(a, b);
-// CHECK: fminnmp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPADD_V_I]]
int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
-// CHECK-LABEL: test_vpadd_s8
return vpadd_s8(a, b);
-// CHECK: addp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vpadd_s16
return vpadd_s16(a, b);
-// CHECK: addp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vpadd_s32
return vpadd_s32(a, b);
-// CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i8> [[VPADD_V_I]]
uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
-// CHECK-LABEL: test_vpadd_u8
return vpadd_u8(a, b);
-// CHECK: addp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
-// CHECK-LABEL: test_vpadd_u16
return vpadd_u16(a, b);
-// CHECK: addp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
-// CHECK-LABEL: test_vpadd_u32
return vpadd_u32(a, b);
-// CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4
+// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float>
+// CHECK: ret <2 x float> [[TMP2]]
float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vpadd_f32
return vpadd_f32(a, b);
-// CHECK: faddp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <16 x i8> @test_vpaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VPADDQ_V_I]]
int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
-// CHECK-LABEL: test_vpaddq_s8
return vpaddq_s8(a, b);
-// CHECK: addp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vpaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4
+// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vpaddq_s16
return vpaddq_s16(a, b);
-// CHECK: addp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vpaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4
+// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vpaddq_s32
return vpaddq_s32(a, b);
-// CHECK: addp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <16 x i8> @test_vpaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+// CHECK: ret <16 x i8> [[VPADDQ_V_I]]
uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
-// CHECK-LABEL: test_vpaddq_u8
return vpaddq_u8(a, b);
-// CHECK: addp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <8 x i16> @test_vpaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4
+// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
-// CHECK-LABEL: test_vpaddq_u16
return vpaddq_u16(a, b);
-// CHECK: addp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vpaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4
+// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
-// CHECK-LABEL: test_vpaddq_u32
return vpaddq_u32(a, b);
-// CHECK: addp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x float> @test_vpaddq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> [[VPADDQ_V_I]], <4 x float> [[VPADDQ_V1_I]]) #4
+// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x float>
+// CHECK: ret <4 x float> [[TMP2]]
float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vpaddq_f32
return vpaddq_f32(a, b);
-// CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vpaddq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> [[VPADDQ_V_I]], <2 x double> [[VPADDQ_V1_I]]) #4
+// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x double>
+// CHECK: ret <2 x double> [[TMP2]]
float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vpaddq_f64
return vpaddq_f64(a, b);
-// CHECK: faddp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4
+// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vqdmulh_s16
return vqdmulh_s16(a, b);
-// CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4
+// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vqdmulh_s32
return vqdmulh_s32(a, b);
-// CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4
+// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vqdmulhq_s16
return vqdmulhq_s16(a, b);
-// CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4
+// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vqdmulhq_s32
return vqdmulhq_s32(a, b);
-// CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4
+// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
-// CHECK-LABEL: test_vqrdmulh_s16
return vqrdmulh_s16(a, b);
-// CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4
+// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
-// CHECK-LABEL: test_vqrdmulh_s32
return vqrdmulh_s32(a, b);
-// CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4
+// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
-// CHECK-LABEL: test_vqrdmulhq_s16
return vqrdmulhq_s16(a, b);
-// CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4
+// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
-// CHECK-LABEL: test_vqrdmulhq_s32
return vqrdmulhq_s32(a, b);
-// CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x float> @test_vmulx_f32(<2 x float> %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #4
+// CHECK: ret <2 x float> [[VMULX2_I]]
float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
-// CHECK-LABEL: test_vmulx_f32
return vmulx_f32(a, b);
-// CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x float> @test_vmulxq_f32(<4 x float> %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #4
+// CHECK: ret <4 x float> [[VMULX2_I]]
float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
-// CHECK-LABEL: test_vmulxq_f32
return vmulxq_f32(a, b);
-// CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x double> @test_vmulxq_f64(<2 x double> %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #4
+// CHECK: ret <2 x double> [[VMULX2_I]]
float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
-// CHECK-LABEL: test_vmulxq_f64
return vmulxq_f64(a, b);
-// CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 {
+// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: ret <8 x i8> [[VSHL_N]]
int8x8_t test_vshl_n_s8(int8x8_t a) {
-// CHECK-LABEL: test_vshl_n_s8
return vshl_n_s8(a, 3);
-// CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <4 x i16> [[VSHL_N]]
int16x4_t test_vshl_n_s16(int16x4_t a) {
-// CHECK-LABEL: test_vshl_n_s16
return vshl_n_s16(a, 3);
-// CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
+// CHECK: ret <2 x i32> [[VSHL_N]]
int32x2_t test_vshl_n_s32(int32x2_t a) {
-// CHECK-LABEL: test_vshl_n_s32
return vshl_n_s32(a, 3);
-// CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 {
+// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: ret <16 x i8> [[VSHL_N]]
int8x16_t test_vshlq_n_s8(int8x16_t a) {
-// CHECK-LABEL: test_vshlq_n_s8
return vshlq_n_s8(a, 3);
-// CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <8 x i16> [[VSHL_N]]
int16x8_t test_vshlq_n_s16(int16x8_t a) {
-// CHECK-LABEL: test_vshlq_n_s16
return vshlq_n_s16(a, 3);
-// CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <4 x i32> [[VSHL_N]]
int32x4_t test_vshlq_n_s32(int32x4_t a) {
-// CHECK-LABEL: test_vshlq_n_s32
return vshlq_n_s32(a, 3);
-// CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
+// CHECK: ret <2 x i64> [[VSHL_N]]
int64x2_t test_vshlq_n_s64(int64x2_t a) {
-// CHECK-LABEL: test_vshlq_n_s64
return vshlq_n_s64(a, 3);
-// CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 {
+// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: ret <8 x i8> [[VSHL_N]]
int8x8_t test_vshl_n_u8(int8x8_t a) {
-// CHECK-LABEL: test_vshl_n_u8
return vshl_n_u8(a, 3);
-// CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <4 x i16> [[VSHL_N]]
int16x4_t test_vshl_n_u16(int16x4_t a) {
-// CHECK-LABEL: test_vshl_n_u16
return vshl_n_u16(a, 3);
-// CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
+// CHECK: ret <2 x i32> [[VSHL_N]]
int32x2_t test_vshl_n_u32(int32x2_t a) {
-// CHECK-LABEL: test_vshl_n_u32
return vshl_n_u32(a, 3);
-// CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 {
+// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: ret <16 x i8> [[VSHL_N]]
int8x16_t test_vshlq_n_u8(int8x16_t a) {
-// CHECK-LABEL: test_vshlq_n_u8
return vshlq_n_u8(a, 3);
-// CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <8 x i16> [[VSHL_N]]
int16x8_t test_vshlq_n_u16(int16x8_t a) {
-// CHECK-LABEL: test_vshlq_n_u16
return vshlq_n_u16(a, 3);
-// CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <4 x i32> [[VSHL_N]]
int32x4_t test_vshlq_n_u32(int32x4_t a) {
-// CHECK-LABEL: test_vshlq_n_u32
return vshlq_n_u32(a, 3);
-// CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
+// CHECK: ret <2 x i64> [[VSHL_N]]
int64x2_t test_vshlq_n_u64(int64x2_t a) {
-// CHECK-LABEL: test_vshlq_n_u64
return vshlq_n_u64(a, 3);
-// CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 {
+// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: ret <8 x i8> [[VSHR_N]]
int8x8_t test_vshr_n_s8(int8x8_t a) {
- // CHECK-LABEL: test_vshr_n_s8
return vshr_n_s8(a, 3);
- // CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <4 x i16> [[VSHR_N]]
int16x4_t test_vshr_n_s16(int16x4_t a) {
- // CHECK-LABEL: test_vshr_n_s16
return vshr_n_s16(a, 3);
- // CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
+// CHECK: ret <2 x i32> [[VSHR_N]]
int32x2_t test_vshr_n_s32(int32x2_t a) {
- // CHECK-LABEL: test_vshr_n_s32
return vshr_n_s32(a, 3);
- // CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 {
+// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: ret <16 x i8> [[VSHR_N]]
int8x16_t test_vshrq_n_s8(int8x16_t a) {
- // CHECK-LABEL: test_vshrq_n_s8
return vshrq_n_s8(a, 3);
- // CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <8 x i16> [[VSHR_N]]
int16x8_t test_vshrq_n_s16(int16x8_t a) {
- // CHECK-LABEL: test_vshrq_n_s16
return vshrq_n_s16(a, 3);
- // CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <4 x i32> [[VSHR_N]]
int32x4_t test_vshrq_n_s32(int32x4_t a) {
- // CHECK-LABEL: test_vshrq_n_s32
return vshrq_n_s32(a, 3);
- // CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
+// CHECK: ret <2 x i64> [[VSHR_N]]
int64x2_t test_vshrq_n_s64(int64x2_t a) {
- // CHECK-LABEL: test_vshrq_n_s64
return vshrq_n_s64(a, 3);
- // CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 {
+// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: ret <8 x i8> [[VSHR_N]]
int8x8_t test_vshr_n_u8(int8x8_t a) {
- // CHECK-LABEL: test_vshr_n_u8
return vshr_n_u8(a, 3);
- // CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <4 x i16> [[VSHR_N]]
int16x4_t test_vshr_n_u16(int16x4_t a) {
- // CHECK-LABEL: test_vshr_n_u16
return vshr_n_u16(a, 3);
- // CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
+// CHECK: ret <2 x i32> [[VSHR_N]]
int32x2_t test_vshr_n_u32(int32x2_t a) {
- // CHECK-LABEL: test_vshr_n_u32
return vshr_n_u32(a, 3);
- // CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 {
+// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: ret <16 x i8> [[VSHR_N]]
int8x16_t test_vshrq_n_u8(int8x16_t a) {
- // CHECK-LABEL: test_vshrq_n_u8
return vshrq_n_u8(a, 3);
- // CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <8 x i16> [[VSHR_N]]
int16x8_t test_vshrq_n_u16(int16x8_t a) {
- // CHECK-LABEL: test_vshrq_n_u16
return vshrq_n_u16(a, 3);
- // CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
+// CHECK: ret <4 x i32> [[VSHR_N]]
int32x4_t test_vshrq_n_u32(int32x4_t a) {
- // CHECK-LABEL: test_vshrq_n_u32
return vshrq_n_u32(a, 3);
- // CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
+// CHECK: ret <2 x i64> [[VSHR_N]]
int64x2_t test_vshrq_n_u64(int64x2_t a) {
- // CHECK-LABEL: test_vshrq_n_u64
return vshrq_n_u64(a, 3);
- // CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vsra_n_s8
return vsra_n_s8(a, b, 3);
- // CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
+// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <4 x i16> [[TMP4]]
int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vsra_n_s16
return vsra_n_s16(a, b, 3);
- // CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <2 x i32> [[TMP4]]
int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vsra_n_s32
return vsra_n_s32(a, b, 3);
- // CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vsraq_n_s8
return vsraq_n_s8(a, b, 3);
- // CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <8 x i16> [[TMP4]]
int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vsraq_n_s16
return vsraq_n_s16(a, b, 3);
- // CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <4 x i32> [[TMP4]]
int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vsraq_n_s32
return vsraq_n_s32(a, b, 3);
- // CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
+// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <2 x i64> [[TMP4]]
int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vsraq_n_s64
return vsraq_n_s64(a, b, 3);
- // CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vsra_n_u8
return vsra_n_u8(a, b, 3);
- // CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
+// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <4 x i16> [[TMP4]]
int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vsra_n_u16
return vsra_n_u16(a, b, 3);
- // CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <2 x i32> [[TMP4]]
int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vsra_n_u32
return vsra_n_u32(a, b, 3);
- // CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vsraq_n_u8
return vsraq_n_u8(a, b, 3);
- // CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <8 x i16> [[TMP4]]
int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vsraq_n_u16
return vsraq_n_u16(a, b, 3);
- // CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <4 x i32> [[TMP4]]
int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vsraq_n_u32
return vsraq_n_u32(a, b, 3);
- // CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
+// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK: ret <2 x i64> [[TMP4]]
int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vsraq_n_u64
return vsraq_n_u64(a, b, 3);
- // CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 {
+// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// CHECK: ret <8 x i8> [[VRSHR_N]]
int8x8_t test_vrshr_n_s8(int8x8_t a) {
- // CHECK-LABEL: test_vrshr_n_s8
return vrshr_n_s8(a, 3);
- // CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
+// CHECK: ret <4 x i16> [[VRSHR_N]]1
int16x4_t test_vrshr_n_s16(int16x4_t a) {
- // CHECK-LABEL: test_vrshr_n_s16
return vrshr_n_s16(a, 3);
- // CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
+// CHECK: ret <2 x i32> [[VRSHR_N]]1
int32x2_t test_vrshr_n_s32(int32x2_t a) {
- // CHECK-LABEL: test_vrshr_n_s32
return vrshr_n_s32(a, 3);
- // CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 {
+// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// CHECK: ret <16 x i8> [[VRSHR_N]]
int8x16_t test_vrshrq_n_s8(int8x16_t a) {
- // CHECK-LABEL: test_vrshrq_n_s8
return vrshrq_n_s8(a, 3);
- // CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
+// CHECK: ret <8 x i16> [[VRSHR_N]]1
int16x8_t test_vrshrq_n_s16(int16x8_t a) {
- // CHECK-LABEL: test_vrshrq_n_s16
return vrshrq_n_s16(a, 3);
- // CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
+// CHECK: ret <4 x i32> [[VRSHR_N]]1
int32x4_t test_vrshrq_n_s32(int32x4_t a) {
- // CHECK-LABEL: test_vrshrq_n_s32
return vrshrq_n_s32(a, 3);
- // CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
+// CHECK: ret <2 x i64> [[VRSHR_N]]1
int64x2_t test_vrshrq_n_s64(int64x2_t a) {
- // CHECK-LABEL: test_vrshrq_n_s64
return vrshrq_n_s64(a, 3);
- // CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 {
+// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// CHECK: ret <8 x i8> [[VRSHR_N]]
int8x8_t test_vrshr_n_u8(int8x8_t a) {
- // CHECK-LABEL: test_vrshr_n_u8
return vrshr_n_u8(a, 3);
- // CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
+// CHECK: ret <4 x i16> [[VRSHR_N]]1
int16x4_t test_vrshr_n_u16(int16x4_t a) {
- // CHECK-LABEL: test_vrshr_n_u16
return vrshr_n_u16(a, 3);
- // CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
+// CHECK: ret <2 x i32> [[VRSHR_N]]1
int32x2_t test_vrshr_n_u32(int32x2_t a) {
- // CHECK-LABEL: test_vrshr_n_u32
return vrshr_n_u32(a, 3);
- // CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 {
+// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// CHECK: ret <16 x i8> [[VRSHR_N]]
int8x16_t test_vrshrq_n_u8(int8x16_t a) {
- // CHECK-LABEL: test_vrshrq_n_u8
return vrshrq_n_u8(a, 3);
- // CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
+// CHECK: ret <8 x i16> [[VRSHR_N]]1
int16x8_t test_vrshrq_n_u16(int16x8_t a) {
- // CHECK-LABEL: test_vrshrq_n_u16
return vrshrq_n_u16(a, 3);
- // CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
+// CHECK: ret <4 x i32> [[VRSHR_N]]1
int32x4_t test_vrshrq_n_u32(int32x4_t a) {
- // CHECK-LABEL: test_vrshrq_n_u32
return vrshrq_n_u32(a, 3);
- // CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
+// CHECK: ret <2 x i64> [[VRSHR_N]]1
int64x2_t test_vrshrq_n_u64(int64x2_t a) {
- // CHECK-LABEL: test_vrshrq_n_u64
return vrshrq_n_u64(a, 3);
- // CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vrsra_n_s8
return vrsra_n_s8(a, b, 3);
- // CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <4 x i16> [[TMP3]]
int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vrsra_n_s16
return vrsra_n_s16(a, b, 3);
- // CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <2 x i32> [[TMP3]]
int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vrsra_n_s32
return vrsra_n_s32(a, b, 3);
- // CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vrsraq_n_s8
return vrsraq_n_s8(a, b, 3);
- // CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <8 x i16> [[TMP3]]
int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vrsraq_n_s16
return vrsraq_n_s16(a, b, 3);
- // CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <4 x i32> [[TMP3]]
int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vrsraq_n_s32
return vrsraq_n_s32(a, b, 3);
- // CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <2 x i64> [[TMP3]]
int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vrsraq_n_s64
return vrsraq_n_s64(a, b, 3);
- // CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
+// CHECK: ret <8 x i8> [[TMP0]]
int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vrsra_n_u8
return vrsra_n_u8(a, b, 3);
- // CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <4 x i16> [[TMP3]]
int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vrsra_n_u16
return vrsra_n_u16(a, b, 3);
- // CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <2 x i32> [[TMP3]]
int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vrsra_n_u32
return vrsra_n_u32(a, b, 3);
- // CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
+// CHECK: ret <16 x i8> [[TMP0]]
int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vrsraq_n_u8
return vrsraq_n_u8(a, b, 3);
- // CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <8 x i16> [[TMP3]]
int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vrsraq_n_u16
return vrsraq_n_u16(a, b, 3);
- // CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <4 x i32> [[TMP3]]
int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vrsraq_n_u32
return vrsraq_n_u32(a, b, 3);
- // CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N]]1
+// CHECK: ret <2 x i64> [[TMP3]]
int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vrsraq_n_u64
return vrsraq_n_u64(a, b, 3);
- // CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// CHECK: ret <8 x i8> [[VSRI_N]]
int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vsri_n_s8
return vsri_n_s8(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N]]1, i32 3)
+// CHECK: ret <4 x i16> [[VSRI_N]]2
int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vsri_n_s16
return vsri_n_s16(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N]]1, i32 3)
+// CHECK: ret <2 x i32> [[VSRI_N]]2
int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vsri_n_s32
return vsri_n_s32(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// CHECK: ret <16 x i8> [[VSRI_N]]
int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vsriq_n_s8
return vsriq_n_s8(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N]]1, i32 3)
+// CHECK: ret <8 x i16> [[VSRI_N]]2
int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vsriq_n_s16
return vsriq_n_s16(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N]]1, i32 3)
+// CHECK: ret <4 x i32> [[VSRI_N]]2
int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vsriq_n_s32
return vsriq_n_s32(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N]]1, i32 3)
+// CHECK: ret <2 x i64> [[VSRI_N]]2
int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vsriq_n_s64
return vsriq_n_s64(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// CHECK: ret <8 x i8> [[VSRI_N]]
int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vsri_n_u8
return vsri_n_u8(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N]]1, i32 3)
+// CHECK: ret <4 x i16> [[VSRI_N]]2
int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vsri_n_u16
return vsri_n_u16(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N]]1, i32 3)
+// CHECK: ret <2 x i32> [[VSRI_N]]2
int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vsri_n_u32
return vsri_n_u32(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// CHECK: ret <16 x i8> [[VSRI_N]]
int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vsriq_n_u8
return vsriq_n_u8(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N]]1, i32 3)
+// CHECK: ret <8 x i16> [[VSRI_N]]2
int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vsriq_n_u16
return vsriq_n_u16(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N]]1, i32 3)
+// CHECK: ret <4 x i32> [[VSRI_N]]2
int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vsriq_n_u32
return vsriq_n_u32(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N]]1, i32 3)
+// CHECK: ret <2 x i64> [[VSRI_N]]2
int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vsriq_n_u64
return vsriq_n_u64(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// CHECK: ret <8 x i8> [[VSRI_N]]
poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
- // CHECK-LABEL: test_vsri_n_p8
return vsri_n_p8(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N]]1, i32 15)
+// CHECK: ret <4 x i16> [[VSRI_N]]2
poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
- // CHECK-LABEL: test_vsri_n_p16
return vsri_n_p16(a, b, 15);
- // CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
}
+// CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// CHECK: ret <16 x i8> [[VSRI_N]]
poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
- // CHECK-LABEL: test_vsriq_n_p8
return vsriq_n_p8(a, b, 3);
- // CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N]]1, i32 15)
+// CHECK: ret <8 x i16> [[VSRI_N]]2
poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
- // CHECK-LABEL: test_vsriq_n_p16
return vsriq_n_p16(a, b, 15);
- // CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
}
+// CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// CHECK: ret <8 x i8> [[VSLI_N]]
int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vsli_n_s8
return vsli_n_s8(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N]]1, i32 3)
+// CHECK: ret <4 x i16> [[VSLI_N]]2
int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vsli_n_s16
return vsli_n_s16(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N]]1, i32 3)
+// CHECK: ret <2 x i32> [[VSLI_N]]2
int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vsli_n_s32
return vsli_n_s32(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// CHECK: ret <16 x i8> [[VSLI_N]]
int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vsliq_n_s8
return vsliq_n_s8(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N]]1, i32 3)
+// CHECK: ret <8 x i16> [[VSLI_N]]2
int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vsliq_n_s16
return vsliq_n_s16(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N]]1, i32 3)
+// CHECK: ret <4 x i32> [[VSLI_N]]2
int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vsliq_n_s32
return vsliq_n_s32(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N]]1, i32 3)
+// CHECK: ret <2 x i64> [[VSLI_N]]2
int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vsliq_n_s64
return vsliq_n_s64(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// CHECK: ret <8 x i8> [[VSLI_N]]
uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
- // CHECK-LABEL: test_vsli_n_u8
return vsli_n_u8(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N]]1, i32 3)
+// CHECK: ret <4 x i16> [[VSLI_N]]2
uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
- // CHECK-LABEL: test_vsli_n_u16
return vsli_n_u16(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N]]1, i32 3)
+// CHECK: ret <2 x i32> [[VSLI_N]]2
uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
- // CHECK-LABEL: test_vsli_n_u32
return vsli_n_u32(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// CHECK: ret <16 x i8> [[VSLI_N]]
uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
- // CHECK-LABEL: test_vsliq_n_u8
return vsliq_n_u8(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N]]1, i32 3)
+// CHECK: ret <8 x i16> [[VSLI_N]]2
uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vsliq_n_u16
return vsliq_n_u16(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N]]1, i32 3)
+// CHECK: ret <4 x i32> [[VSLI_N]]2
uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vsliq_n_u32
return vsliq_n_u32(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N]]1, i32 3)
+// CHECK: ret <2 x i64> [[VSLI_N]]2
uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vsliq_n_u64
return vsliq_n_u64(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// CHECK: ret <8 x i8> [[VSLI_N]]
poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
- // CHECK-LABEL: test_vsli_n_p8
return vsli_n_p8(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N]]1, i32 15)
+// CHECK: ret <4 x i16> [[VSLI_N]]2
poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
- // CHECK-LABEL: test_vsli_n_p16
return vsli_n_p16(a, b, 15);
- // CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
}
+// CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// CHECK: ret <16 x i8> [[VSLI_N]]
poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
- // CHECK-LABEL: test_vsliq_n_p8
return vsliq_n_p8(a, b, 3);
- // CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N]]1, i32 15)
+// CHECK: ret <8 x i16> [[VSLI_N]]2
poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
- // CHECK-LABEL: test_vsliq_n_p16
return vsliq_n_p16(a, b, 15);
- // CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
}
+// CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 {
+// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+// CHECK: ret <8 x i8> [[VQSHLU_N]]
int8x8_t test_vqshlu_n_s8(int8x8_t a) {
- // CHECK-LABEL: test_vqshlu_n_s8
return vqshlu_n_s8(a, 3);
- // CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
+// CHECK: ret <4 x i16> [[VQSHLU_N]]1
int16x4_t test_vqshlu_n_s16(int16x4_t a) {
- // CHECK-LABEL: test_vqshlu_n_s16
return vqshlu_n_s16(a, 3);
- // CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
}
+// CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
+// CHECK: ret <2 x i32> [[VQSHLU_N]]1
int32x2_t test_vqshlu_n_s32(int32x2_t a) {
- // CHECK-LABEL: test_vqshlu_n_s32
return vqshlu_n_s32(a, 3);
- // CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
}
+// CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 {
+// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+// CHECK: ret <16 x i8> [[VQSHLU_N]]
int8x16_t test_vqshluq_n_s8(int8x16_t a) {
- // CHECK-LABEL: test_vqshluq_n_s8
return vqshluq_n_s8(a, 3);
- // CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+// CHECK: ret <8 x i16> [[VQSHLU_N]]1
int16x8_t test_vqshluq_n_s16(int16x8_t a) {
- // CHECK-LABEL: test_vqshluq_n_s16
return vqshluq_n_s16(a, 3);
- // CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+// CHECK: ret <4 x i32> [[VQSHLU_N]]1
int32x4_t test_vqshluq_n_s32(int32x4_t a) {
- // CHECK-LABEL: test_vqshluq_n_s32
return vqshluq_n_s32(a, 3);
- // CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
}
+// CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
+// CHECK: ret <2 x i64> [[VQSHLU_N]]1
int64x2_t test_vqshluq_n_s64(int64x2_t a) {
- // CHECK-LABEL: test_vqshluq_n_s64
return vqshluq_n_s64(a, 3);
- // CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
}
+// CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VSHRN_N]]
int8x8_t test_vshrn_n_s16(int16x8_t a) {
- // CHECK-LABEL: test_vshrn_n_s16
return vshrn_n_s16(a, 3);
- // CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VSHRN_N]]
int16x4_t test_vshrn_n_s32(int32x4_t a) {
- // CHECK-LABEL: test_vshrn_n_s32
return vshrn_n_s32(a, 9);
- // CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VSHRN_N]]
int32x2_t test_vshrn_n_s64(int64x2_t a) {
- // CHECK-LABEL: test_vshrn_n_s64
return vshrn_n_s64(a, 19);
- // CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VSHRN_N]]
uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
- // CHECK-LABEL: test_vshrn_n_u16
return vshrn_n_u16(a, 3);
- // CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VSHRN_N]]
uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
- // CHECK-LABEL: test_vshrn_n_u32
return vshrn_n_u32(a, 9);
- // CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VSHRN_N]]
uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
- // CHECK-LABEL: test_vshrn_n_u64
return vshrn_n_u64(a, 19);
- // CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vshrn_high_n_s16
return vshrn_high_n_s16(a, b, 3);
- // CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vshrn_high_n_s32
return vshrn_high_n_s32(a, b, 9);
- // CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vshrn_high_n_s64
return vshrn_high_n_s64(a, b, 19);
- // CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vshrn_high_n_u16
return vshrn_high_n_u16(a, b, 3);
- // CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vshrn_high_n_u32
return vshrn_high_n_u32(a, b, 9);
- // CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vshrn_high_n_u64
return vshrn_high_n_u64(a, b, 19);
- // CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
+// CHECK: ret <8 x i8> [[VQSHRUN_N]]1
int8x8_t test_vqshrun_n_s16(int16x8_t a) {
- // CHECK-LABEL: test_vqshrun_n_s16
return vqshrun_n_s16(a, 3);
- // CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
+// CHECK: ret <4 x i16> [[VQSHRUN_N]]1
int16x4_t test_vqshrun_n_s32(int32x4_t a) {
- // CHECK-LABEL: test_vqshrun_n_s32
return vqshrun_n_s32(a, 9);
- // CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
+// CHECK: ret <2 x i32> [[VQSHRUN_N]]1
int32x2_t test_vqshrun_n_s64(int64x2_t a) {
- // CHECK-LABEL: test_vqshrun_n_s64
return vqshrun_n_s64(a, 19);
- // CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N]]1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vqshrun_high_n_s16
return vqshrun_high_n_s16(a, b, 3);
- // CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N]]1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vqshrun_high_n_s32
return vqshrun_high_n_s32(a, b, 9);
- // CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N]]1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vqshrun_high_n_s64
return vqshrun_high_n_s64(a, b, 19);
- // CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// CHECK: ret <8 x i8> [[VRSHRN_N]]1
int8x8_t test_vrshrn_n_s16(int16x8_t a) {
- // CHECK-LABEL: test_vrshrn_n_s16
return vrshrn_n_s16(a, 3);
- // CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// CHECK: ret <4 x i16> [[VRSHRN_N]]1
int16x4_t test_vrshrn_n_s32(int32x4_t a) {
- // CHECK-LABEL: test_vrshrn_n_s32
return vrshrn_n_s32(a, 9);
- // CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// CHECK: ret <2 x i32> [[VRSHRN_N]]1
int32x2_t test_vrshrn_n_s64(int64x2_t a) {
- // CHECK-LABEL: test_vrshrn_n_s64
return vrshrn_n_s64(a, 19);
- // CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// CHECK: ret <8 x i8> [[VRSHRN_N]]1
uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
- // CHECK-LABEL: test_vrshrn_n_u16
return vrshrn_n_u16(a, 3);
- // CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// CHECK: ret <4 x i16> [[VRSHRN_N]]1
uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
- // CHECK-LABEL: test_vrshrn_n_u32
return vrshrn_n_u32(a, 9);
- // CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// CHECK: ret <2 x i32> [[VRSHRN_N]]1
uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
- // CHECK-LABEL: test_vrshrn_n_u64
return vrshrn_n_u64(a, 19);
- // CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N]]1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vrshrn_high_n_s16
return vrshrn_high_n_s16(a, b, 3);
- // CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N]]1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vrshrn_high_n_s32
return vrshrn_high_n_s32(a, b, 9);
- // CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N]]1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vrshrn_high_n_s64
return vrshrn_high_n_s64(a, b, 19);
- // CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N]]1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vrshrn_high_n_u16
return vrshrn_high_n_u16(a, b, 3);
- // CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N]]1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vrshrn_high_n_u32
return vrshrn_high_n_u32(a, b, 9);
- // CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N]]1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vrshrn_high_n_u64
return vrshrn_high_n_u64(a, b, 19);
- // CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
+// CHECK: ret <8 x i8> [[VQRSHRUN_N]]1
int8x8_t test_vqrshrun_n_s16(int16x8_t a) {
- // CHECK-LABEL: test_vqrshrun_n_s16
return vqrshrun_n_s16(a, 3);
- // CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
+// CHECK: ret <4 x i16> [[VQRSHRUN_N]]1
int16x4_t test_vqrshrun_n_s32(int32x4_t a) {
- // CHECK-LABEL: test_vqrshrun_n_s32
return vqrshrun_n_s32(a, 9);
- // CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
+// CHECK: ret <2 x i32> [[VQRSHRUN_N]]1
int32x2_t test_vqrshrun_n_s64(int64x2_t a) {
- // CHECK-LABEL: test_vqrshrun_n_s64
return vqrshrun_n_s64(a, 19);
- // CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N]]1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vqrshrun_high_n_s16
return vqrshrun_high_n_s16(a, b, 3);
- // CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N]]1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vqrshrun_high_n_s32
return vqrshrun_high_n_s32(a, b, 9);
- // CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N]]1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vqrshrun_high_n_s64
return vqrshrun_high_n_s64(a, b, 19);
- // CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// CHECK: ret <8 x i8> [[VQSHRN_N]]1
int8x8_t test_vqshrn_n_s16(int16x8_t a) {
- // CHECK-LABEL: test_vqshrn_n_s16
return vqshrn_n_s16(a, 3);
- // CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// CHECK: ret <4 x i16> [[VQSHRN_N]]1
int16x4_t test_vqshrn_n_s32(int32x4_t a) {
- // CHECK-LABEL: test_vqshrn_n_s32
return vqshrn_n_s32(a, 9);
- // CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// CHECK: ret <2 x i32> [[VQSHRN_N]]1
int32x2_t test_vqshrn_n_s64(int64x2_t a) {
- // CHECK-LABEL: test_vqshrn_n_s64
return vqshrn_n_s64(a, 19);
- // CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// CHECK: ret <8 x i8> [[VQSHRN_N]]1
uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
- // CHECK-LABEL: test_vqshrn_n_u16
return vqshrn_n_u16(a, 3);
- // CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// CHECK: ret <4 x i16> [[VQSHRN_N]]1
uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
- // CHECK-LABEL: test_vqshrn_n_u32
return vqshrn_n_u32(a, 9);
- // CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// CHECK: ret <2 x i32> [[VQSHRN_N]]1
uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
- // CHECK-LABEL: test_vqshrn_n_u64
return vqshrn_n_u64(a, 19);
- // CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N]]1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vqshrn_high_n_s16
return vqshrn_high_n_s16(a, b, 3);
- // CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N]]1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vqshrn_high_n_s32
return vqshrn_high_n_s32(a, b, 9);
- // CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N]]1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vqshrn_high_n_s64
return vqshrn_high_n_s64(a, b, 19);
- // CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N]]1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vqshrn_high_n_u16
return vqshrn_high_n_u16(a, b, 3);
- // CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N]]1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vqshrn_high_n_u32
return vqshrn_high_n_u32(a, b, 9);
- // CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N]]1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vqshrn_high_n_u64
return vqshrn_high_n_u64(a, b, 19);
- // CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// CHECK: ret <8 x i8> [[VQRSHRN_N]]1
int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
- // CHECK-LABEL: test_vqrshrn_n_s16
return vqrshrn_n_s16(a, 3);
- // CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// CHECK: ret <4 x i16> [[VQRSHRN_N]]1
int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
- // CHECK-LABEL: test_vqrshrn_n_s32
return vqrshrn_n_s32(a, 9);
- // CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// CHECK: ret <2 x i32> [[VQRSHRN_N]]1
int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
- // CHECK-LABEL: test_vqrshrn_n_s64
return vqrshrn_n_s64(a, 19);
- // CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// CHECK: ret <8 x i8> [[VQRSHRN_N]]1
uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
- // CHECK-LABEL: test_vqrshrn_n_u16
return vqrshrn_n_u16(a, 3);
- // CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// CHECK: ret <4 x i16> [[VQRSHRN_N]]1
uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
- // CHECK-LABEL: test_vqrshrn_n_u32
return vqrshrn_n_u32(a, 9);
- // CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// CHECK: ret <2 x i32> [[VQRSHRN_N]]1
uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
- // CHECK-LABEL: test_vqrshrn_n_u64
return vqrshrn_n_u64(a, 19);
- // CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N]]1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vqrshrn_high_n_s16
return vqrshrn_high_n_s16(a, b, 3);
- // CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N]]1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vqrshrn_high_n_s32
return vqrshrn_high_n_s32(a, b, 9);
- // CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N]]1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vqrshrn_high_n_s64
return vqrshrn_high_n_s64(a, b, 19);
- // CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N]]1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I]]
uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vqrshrn_high_n_u16
return vqrshrn_high_n_u16(a, b, 3);
- // CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
}
+// CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N]]1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I]]
uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vqrshrn_high_n_u32
return vqrshrn_high_n_u32(a, b, 9);
- // CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
}
+// CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N]]1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I]]
uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vqrshrn_high_n_u64
return vqrshrn_high_n_u64(a, b, 19);
- // CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
}
+// CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
+// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <8 x i16> [[VSHLL_N]]
int16x8_t test_vshll_n_s8(int8x8_t a) {
-// CHECK-LABEL: test_vshll_n_s8
return vshll_n_s8(a, 3);
-// CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// CHECK: ret <4 x i32> [[VSHLL_N]]
int32x4_t test_vshll_n_s16(int16x4_t a) {
-// CHECK-LABEL: test_vshll_n_s16
return vshll_n_s16(a, 9);
-// CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
}
+// CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// CHECK: ret <2 x i64> [[VSHLL_N]]
int64x2_t test_vshll_n_s32(int32x2_t a) {
-// CHECK-LABEL: test_vshll_n_s32
return vshll_n_s32(a, 19);
-// CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
}
+// CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
+// CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
+// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <8 x i16> [[VSHLL_N]]
uint16x8_t test_vshll_n_u8(uint8x8_t a) {
-// CHECK-LABEL: test_vshll_n_u8
return vshll_n_u8(a, 3);
-// CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// CHECK: ret <4 x i32> [[VSHLL_N]]
uint32x4_t test_vshll_n_u16(uint16x4_t a) {
-// CHECK-LABEL: test_vshll_n_u16
return vshll_n_u16(a, 9);
-// CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
}
+// CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// CHECK: ret <2 x i64> [[VSHLL_N]]
uint64x2_t test_vshll_n_u32(uint32x2_t a) {
-// CHECK-LABEL: test_vshll_n_u32
return vshll_n_u32(a, 19);
-// CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
}
+// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <8 x i16> [[VSHLL_N]]
int16x8_t test_vshll_high_n_s8(int8x16_t a) {
-// CHECK-LABEL: test_vshll_high_n_s8
return vshll_high_n_s8(a, 3);
-// CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// CHECK: ret <4 x i32> [[VSHLL_N]]
int32x4_t test_vshll_high_n_s16(int16x8_t a) {
-// CHECK-LABEL: test_vshll_high_n_s16
return vshll_high_n_s16(a, 9);
-// CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
}
+// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// CHECK: ret <2 x i64> [[VSHLL_N]]
int64x2_t test_vshll_high_n_s32(int32x4_t a) {
-// CHECK-LABEL: test_vshll_high_n_s32
return vshll_high_n_s32(a, 19);
-// CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
}
+// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// CHECK: ret <8 x i16> [[VSHLL_N]]
uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
-// CHECK-LABEL: test_vshll_high_n_u8
return vshll_high_n_u8(a, 3);
-// CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
}
+// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// CHECK: ret <4 x i32> [[VSHLL_N]]
uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
-// CHECK-LABEL: test_vshll_high_n_u16
return vshll_high_n_u16(a, 9);
-// CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
}
+// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// CHECK: ret <2 x i64> [[VSHLL_N]]
uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
-// CHECK-LABEL: test_vshll_high_n_u32
return vshll_high_n_u32(a, 19);
-// CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
}
+// CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 {
+// CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I]]
int16x8_t test_vmovl_s8(int8x8_t a) {
-// CHECK-LABEL: test_vmovl_s8
return vmovl_s8(a);
-// CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
}
+// CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I]]
int32x4_t test_vmovl_s16(int16x4_t a) {
-// CHECK-LABEL: test_vmovl_s16
return vmovl_s16(a);
-// CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
}
+// CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I]]
int64x2_t test_vmovl_s32(int32x2_t a) {
-// CHECK-LABEL: test_vmovl_s32
return vmovl_s32(a);
-// CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
}
+// CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 {
+// CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I]]
uint16x8_t test_vmovl_u8(uint8x8_t a) {
-// CHECK-LABEL: test_vmovl_u8
return vmovl_u8(a);
-// CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
}
+// CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I]]
uint32x4_t test_vmovl_u16(uint16x4_t a) {
-// CHECK-LABEL: test_vmovl_u16
return vmovl_u16(a);
-// CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
}
+// CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I]]
uint64x2_t test_vmovl_u32(uint32x2_t a) {
-// CHECK-LABEL: test_vmovl_u32
return vmovl_u32(a);
-// CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
}
+// CHECK-LABEL: define <8 x i16> @test_vmovl_high_s8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
int16x8_t test_vmovl_high_s8(int8x16_t a) {
-// CHECK-LABEL: test_vmovl_high_s8
return vmovl_high_s8(a);
-// CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
}
+// CHECK-LABEL: define <4 x i32> @test_vmovl_high_s16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vmovl_high_s16(int16x8_t a) {
-// CHECK-LABEL: test_vmovl_high_s16
return vmovl_high_s16(a);
-// CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
}
+// CHECK-LABEL: define <2 x i64> @test_vmovl_high_s32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vmovl_high_s32(int32x4_t a) {
-// CHECK-LABEL: test_vmovl_high_s32
return vmovl_high_s32(a);
-// CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
}
+// CHECK-LABEL: define <8 x i16> @test_vmovl_high_u8(<16 x i8> %a) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[TMP0]]
uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
-// CHECK-LABEL: test_vmovl_high_u8
return vmovl_high_u8(a);
-// CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
}
+// CHECK-LABEL: define <4 x i32> @test_vmovl_high_u16(<8 x i16> %a) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
-// CHECK-LABEL: test_vmovl_high_u16
return vmovl_high_u16(a);
-// CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
}
+// CHECK-LABEL: define <2 x i64> @test_vmovl_high_u32(<4 x i32> %a) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
-// CHECK-LABEL: test_vmovl_high_u32
return vmovl_high_u32(a);
-// CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
}
+// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
+// CHECK: ret <2 x float> [[VCVT_N]]1
float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
- // CHECK-LABEL: test_vcvt_n_f32_s32
return vcvt_n_f32_s32(a, 31);
- // CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
}
+// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
+// CHECK: ret <4 x float> [[VCVT_N]]1
float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
- // CHECK-LABEL: test_vcvtq_n_f32_s32
return vcvtq_n_f32_s32(a, 31);
- // CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
}
+// CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
+// CHECK: ret <2 x double> [[VCVT_N]]1
float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
- // CHECK-LABEL: test_vcvtq_n_f64_s64
return vcvtq_n_f64_s64(a, 50);
- // CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
}
+// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
+// CHECK: ret <2 x float> [[VCVT_N]]1
float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
- // CHECK-LABEL: test_vcvt_n_f32_u32
return vcvt_n_f32_u32(a, 31);
- // CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
}
+// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
+// CHECK: ret <4 x float> [[VCVT_N]]1
float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
- // CHECK-LABEL: test_vcvtq_n_f32_u32
return vcvtq_n_f32_u32(a, 31);
- // CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
}
+// CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
+// CHECK: ret <2 x double> [[VCVT_N]]1
float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
- // CHECK-LABEL: test_vcvtq_n_f64_u64
return vcvtq_n_f64_u64(a, 50);
- // CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
}
+// CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
+// CHECK: ret <2 x i32> [[VCVT_N]]1
int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
- // CHECK-LABEL: test_vcvt_n_s32_f32
return vcvt_n_s32_f32(a, 31);
- // CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
}
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
+// CHECK: ret <4 x i32> [[VCVT_N]]1
int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
- // CHECK-LABEL: test_vcvtq_n_s32_f32
return vcvtq_n_s32_f32(a, 31);
- // CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
}
+// CHECK-LABEL: define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
+// CHECK: ret <2 x i64> [[VCVT_N]]1
int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
- // CHECK-LABEL: test_vcvtq_n_s64_f64
return vcvtq_n_s64_f64(a, 50);
- // CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
}
+// CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
+// CHECK: ret <2 x i32> [[VCVT_N]]1
uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
- // CHECK-LABEL: test_vcvt_n_u32_f32
return vcvt_n_u32_f32(a, 31);
- // CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
}
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
+// CHECK: ret <4 x i32> [[VCVT_N]]1
uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
- // CHECK-LABEL: test_vcvtq_n_u32_f32
return vcvtq_n_u32_f32(a, 31);
- // CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
}
+// CHECK-LABEL: define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
+// CHECK: ret <2 x i64> [[VCVT_N]]1
uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
- // CHECK-LABEL: test_vcvtq_n_u64_f64
return vcvtq_n_u64_f64(a, 50);
- // CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
}
+// CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vaddl_s8
return vaddl_s8(a, b);
- // CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vaddl_s16
return vaddl_s16(a, b);
- // CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vaddl_s32
return vaddl_s32(a, b);
- // CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
- // CHECK-LABEL: test_vaddl_u8
return vaddl_u8(a, b);
- // CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
- // CHECK-LABEL: test_vaddl_u16
return vaddl_u16(a, b);
- // CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
- // CHECK-LABEL: test_vaddl_u32
return vaddl_u32(a, b);
- // CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vaddl_high_s8
return vaddl_high_s8(a, b);
- // CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vaddl_high_s16
return vaddl_high_s16(a, b);
- // CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vaddl_high_s32
return vaddl_high_s32(a, b);
- // CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
- // CHECK-LABEL: test_vaddl_high_u8
return vaddl_high_u8(a, b);
- // CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vaddl_high_u16
return vaddl_high_u16(a, b);
- // CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vaddl_high_u32
return vaddl_high_u32(a, b);
- // CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vaddw_s8
return vaddw_s8(a, b);
- // CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vaddw_s16
return vaddw_s16(a, b);
- // CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vaddw_s32
return vaddw_s32(a, b);
- // CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
- // CHECK-LABEL: test_vaddw_u8
return vaddw_u8(a, b);
- // CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
- // CHECK-LABEL: test_vaddw_u16
return vaddw_u16(a, b);
- // CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
- // CHECK-LABEL: test_vaddw_u32
return vaddw_u32(a, b);
- // CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
- // CHECK-LABEL: test_vaddw_high_s8
return vaddw_high_s8(a, b);
- // CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
- // CHECK-LABEL: test_vaddw_high_s16
return vaddw_high_s16(a, b);
- // CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
- // CHECK-LABEL: test_vaddw_high_s32
return vaddw_high_s32(a, b);
- // CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
- // CHECK-LABEL: test_vaddw_high_u8
return vaddw_high_u8(a, b);
- // CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vaddw_high_u16
return vaddw_high_u16(a, b);
- // CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vaddw_high_u32
return vaddw_high_u32(a, b);
- // CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vsubl_s8
return vsubl_s8(a, b);
- // CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vsubl_s16
return vsubl_s16(a, b);
- // CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vsubl_s32
return vsubl_s32(a, b);
- // CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
- // CHECK-LABEL: test_vsubl_u8
return vsubl_u8(a, b);
- // CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
- // CHECK-LABEL: test_vsubl_u16
return vsubl_u16(a, b);
- // CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
- // CHECK-LABEL: test_vsubl_u32
return vsubl_u32(a, b);
- // CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vsubl_high_s8
return vsubl_high_s8(a, b);
- // CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vsubl_high_s16
return vsubl_high_s16(a, b);
- // CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
+// CHECK: ret <2 x i64> [[SUB_I]]
int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vsubl_high_s32
return vsubl_high_s32(a, b);
- // CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
- // CHECK-LABEL: test_vsubl_high_u8
return vsubl_high_u8(a, b);
- // CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vsubl_high_u16
return vsubl_high_u16(a, b);
- // CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
+// CHECK: ret <2 x i64> [[SUB_I]]
uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vsubl_high_u32
return vsubl_high_u32(a, b);
- // CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vsubw_s8
return vsubw_s8(a, b);
- // CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vsubw_s16
return vsubw_s16(a, b);
- // CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vsubw_s32
return vsubw_s32(a, b);
- // CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
- // CHECK-LABEL: test_vsubw_u8
return vsubw_u8(a, b);
- // CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
- // CHECK-LABEL: test_vsubw_u16
return vsubw_u16(a, b);
- // CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
- // CHECK-LABEL: test_vsubw_u32
return vsubw_u32(a, b);
- // CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
- // CHECK-LABEL: test_vsubw_high_s8
return vsubw_high_s8(a, b);
- // CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
- // CHECK-LABEL: test_vsubw_high_s16
return vsubw_high_s16(a, b);
- // CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]]
+// CHECK: ret <2 x i64> [[SUB_I]]
int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
- // CHECK-LABEL: test_vsubw_high_s32
return vsubw_high_s32(a, b);
- // CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
- // CHECK-LABEL: test_vsubw_high_u8
return vsubw_high_u8(a, b);
- // CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vsubw_high_u16
return vsubw_high_u16(a, b);
- // CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]]
+// CHECK: ret <2 x i64> [[SUB_I]]
uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vsubw_high_u32
return vsubw_high_u32(a, b);
- // CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VADDHN2_I]]
int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vaddhn_s16
return vaddhn_s16(a, b);
- // CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VADDHN2_I]]
int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vaddhn_s32
return vaddhn_s32(a, b);
- // CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VADDHN2_I]]
int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vaddhn_s64
return vaddhn_s64(a, b);
- // CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VADDHN2_I]]
uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vaddhn_u16
return vaddhn_u16(a, b);
- // CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VADDHN2_I]]
uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vaddhn_u32
return vaddhn_u32(a, b);
- // CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
+// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VADDHN2_I]]
uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vaddhn_u64
return vaddhn_u64(a, b);
- // CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vaddhn_high_s16
return vaddhn_high_s16(r, a, b);
- // CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vaddhn_high_s32
return vaddhn_high_s32(r, a, b);
- // CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
+// CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vaddhn_high_s64
return vaddhn_high_s64(r, a, b);
- // CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vaddhn_high_u16
return vaddhn_high_u16(r, a, b);
- // CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vaddhn_high_u32
return vaddhn_high_u32(r, a, b);
- // CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
+// CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vaddhn_high_u64
return vaddhn_high_u64(r, a, b);
- // CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
+// CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vraddhn_s16
return vraddhn_s16(a, b);
- // CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
+// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vraddhn_s32
return vraddhn_s32(a, b);
- // CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
+// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vraddhn_s64
return vraddhn_s64(a, b);
- // CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
+// CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vraddhn_u16
return vraddhn_u16(a, b);
- // CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
+// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vraddhn_u32
return vraddhn_u32(a, b);
- // CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
+// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vraddhn_u64
return vraddhn_u64(a, b);
- // CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vraddhn_high_s16
return vraddhn_high_s16(r, a, b);
- // CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4
+// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vraddhn_high_s32
return vraddhn_high_s32(r, a, b);
- // CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4
+// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vraddhn_high_s64
return vraddhn_high_s64(r, a, b);
- // CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vraddhn_high_u16
return vraddhn_high_u16(r, a, b);
- // CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4
+// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vraddhn_high_u32
return vraddhn_high_u32(r, a, b);
- // CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4
+// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vraddhn_high_u64
return vraddhn_high_u64(r, a, b);
- // CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VSUBHN2_I]]
int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vsubhn_s16
return vsubhn_s16(a, b);
- // CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VSUBHN2_I]]
int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vsubhn_s32
return vsubhn_s32(a, b);
- // CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
+// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VSUBHN2_I]]
int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vsubhn_s64
return vsubhn_s64(a, b);
- // CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// CHECK: ret <8 x i8> [[VSUBHN2_I]]
uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vsubhn_u16
return vsubhn_u16(a, b);
- // CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[VSUBHN2_I]]
uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vsubhn_u32
return vsubhn_u32(a, b);
- // CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
+// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[VSUBHN2_I]]
uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vsubhn_u64
return vsubhn_u64(a, b);
- // CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vsubhn_high_s16
return vsubhn_high_s16(r, a, b);
- // CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vsubhn_high_s32
return vsubhn_high_s32(r, a, b);
- // CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
+// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vsubhn_high_s64
return vsubhn_high_s64(r, a, b);
- // CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vsubhn_high_u16
return vsubhn_high_u16(r, a, b);
- // CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vsubhn_high_u32
return vsubhn_high_u32(r, a, b);
- // CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
+// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vsubhn_high_u64
return vsubhn_high_u64(r, a, b);
- // CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
+// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vrsubhn_s16
return vrsubhn_s16(a, b);
- // CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
+// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vrsubhn_s32
return vrsubhn_s32(a, b);
- // CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
+// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vrsubhn_s64
return vrsubhn_s64(a, b);
- // CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
+// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vrsubhn_u16
return vrsubhn_u16(a, b);
- // CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
+// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vrsubhn_u32
return vrsubhn_u32(a, b);
- // CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
+// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vrsubhn_u64
return vrsubhn_u64(a, b);
- // CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vrsubhn_high_s16
return vrsubhn_high_s16(r, a, b);
- // CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4
+// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vrsubhn_high_s32
return vrsubhn_high_s32(r, a, b);
- // CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4
+// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
- // CHECK-LABEL: test_vrsubhn_high_s64
return vrsubhn_high_s64(r, a, b);
- // CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vrsubhn_high_u16
return vrsubhn_high_u16(r, a, b);
- // CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4
+// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vrsubhn_high_u32
return vrsubhn_high_u32(r, a, b);
- // CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4
+// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
- // CHECK-LABEL: test_vrsubhn_high_u64
return vrsubhn_high_u64(r, a, b);
- // CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I_I]]
int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vabdl_s8
return vabdl_s8(a, b);
- // CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I_I]]
int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vabdl_s16
return vabdl_s16(a, b);
- // CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I_I]]
int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vabdl_s32
return vabdl_s32(a, b);
- // CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I_I]]
uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
- // CHECK-LABEL: test_vabdl_u8
return vabdl_u8(a, b);
- // CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I_I]]
uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
- // CHECK-LABEL: test_vabdl_u16
return vabdl_u16(a, b);
- // CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I_I]]
uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
- // CHECK-LABEL: test_vabdl_u32
return vabdl_u32(a, b);
- // CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
- // CHECK-LABEL: test_vabal_s8
return vabal_s8(a, b, c);
- // CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
- // CHECK-LABEL: test_vabal_s16
return vabal_s16(a, b, c);
- // CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
- // CHECK-LABEL: test_vabal_s32
return vabal_s32(a, b, c);
- // CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
- // CHECK-LABEL: test_vabal_u8
return vabal_u8(a, b, c);
- // CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
- // CHECK-LABEL: test_vabal_u16
return vabal_u16(a, b, c);
- // CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
- // CHECK-LABEL: test_vabal_u32
return vabal_u32(a, b, c);
- // CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vabdl_high_s8
return vabdl_high_s8(a, b);
- // CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vabdl_high_s16
return vabdl_high_s16(a, b);
- // CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vabdl_high_s32
return vabdl_high_s32(a, b);
- // CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
- // CHECK-LABEL: test_vabdl_high_u8
return vabdl_high_u8(a, b);
- // CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vabdl_high_u16
return vabdl_high_u16(a, b);
- // CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vabdl_high_u32
return vabdl_high_u32(a, b);
- // CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
+// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I_I]]
int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
- // CHECK-LABEL: test_vabal_high_s8
return vabal_high_s8(a, b, c);
- // CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I_I]]
int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
- // CHECK-LABEL: test_vabal_high_s16
return vabal_high_s16(a, b, c);
- // CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I_I]]
int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
- // CHECK-LABEL: test_vabal_high_s32
return vabal_high_s32(a, b, c);
- // CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
+// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I_I]]
uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
- // CHECK-LABEL: test_vabal_high_u8
return vabal_high_u8(a, b, c);
- // CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I_I]]
uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
- // CHECK-LABEL: test_vabal_high_u16
return vabal_high_u16(a, b, c);
- // CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I_I]]
uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
- // CHECK-LABEL: test_vabal_high_u32
return vabal_high_u32(a, b, c);
- // CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i16> [[VMULL_I]]
int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
- // CHECK-LABEL: test_vmull_s8
return vmull_s8(a, b);
- // CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL2_I]]
int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vmull_s16
return vmull_s16(a, b);
- // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL2_I]]
int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vmull_s32
return vmull_s32(a, b);
- // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i16> [[VMULL_I]]
uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
- // CHECK-LABEL: test_vmull_u8
return vmull_u8(a, b);
- // CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL2_I]]
uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
- // CHECK-LABEL: test_vmull_u16
return vmull_u16(a, b);
- // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL2_I]]
uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
- // CHECK-LABEL: test_vmull_u32
return vmull_u32(a, b);
- // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: ret <8 x i16> [[VMULL_I_I]]
int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
- // CHECK-LABEL: test_vmull_high_s8
return vmull_high_s8(a, b);
- // CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL2_I_I]]
int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vmull_high_s16
return vmull_high_s16(a, b);
- // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL2_I_I]]
int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vmull_high_s32
return vmull_high_s32(a, b);
- // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: ret <8 x i16> [[VMULL_I_I]]
uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
- // CHECK-LABEL: test_vmull_high_u8
return vmull_high_u8(a, b);
- // CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: ret <4 x i32> [[VMULL2_I_I]]
uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
- // CHECK-LABEL: test_vmull_high_u16
return vmull_high_u16(a, b);
- // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: ret <2 x i64> [[VMULL2_I_I]]
uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
- // CHECK-LABEL: test_vmull_high_u32
return vmull_high_u32(a, b);
- // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
- // CHECK-LABEL: test_vmlal_s8
return vmlal_s8(a, b, c);
- // CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
- // CHECK-LABEL: test_vmlal_s16
return vmlal_s16(a, b, c);
- // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
- // CHECK-LABEL: test_vmlal_s32
return vmlal_s32(a, b, c);
- // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I]]
uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
- // CHECK-LABEL: test_vmlal_u8
return vmlal_u8(a, b, c);
- // CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I]]
uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
- // CHECK-LABEL: test_vmlal_u16
return vmlal_u16(a, b, c);
- // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I]]
uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
- // CHECK-LABEL: test_vmlal_u32
return vmlal_u32(a, b, c);
- // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I_I]]
int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
- // CHECK-LABEL: test_vmlal_high_s8
return vmlal_high_s8(a, b, c);
- // CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
+// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I_I]]
int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
- // CHECK-LABEL: test_vmlal_high_s16
return vmlal_high_s16(a, b, c);
- // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
+// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I_I]]
int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
- // CHECK-LABEL: test_vmlal_high_s32
return vmlal_high_s32(a, b, c);
- // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
+// CHECK: ret <8 x i16> [[ADD_I_I]]
uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
- // CHECK-LABEL: test_vmlal_high_u8
return vmlal_high_u8(a, b, c);
- // CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
+// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
+// CHECK: ret <4 x i32> [[ADD_I_I]]
uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
- // CHECK-LABEL: test_vmlal_high_u16
return vmlal_high_u16(a, b, c);
- // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
+// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
+// CHECK: ret <2 x i64> [[ADD_I_I]]
uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
- // CHECK-LABEL: test_vmlal_high_u32
return vmlal_high_u32(a, b, c);
- // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
- // CHECK-LABEL: test_vmlsl_s8
return vmlsl_s8(a, b, c);
- // CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
- // CHECK-LABEL: test_vmlsl_s16
return vmlsl_s16(a, b, c);
- // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
- // CHECK-LABEL: test_vmlsl_s32
return vmlsl_s32(a, b, c);
- // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
+// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
+// CHECK: ret <8 x i16> [[SUB_I]]
uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
- // CHECK-LABEL: test_vmlsl_u8
return vmlsl_u8(a, b, c);
- // CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I]]
uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
- // CHECK-LABEL: test_vmlsl_u16
return vmlsl_u16(a, b, c);
- // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
+// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I]]
uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
- // CHECK-LABEL: test_vmlsl_u32
return vmlsl_u32(a, b, c);
- // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
+// CHECK: ret <8 x i16> [[SUB_I_I]]
int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
- // CHECK-LABEL: test_vmlsl_high_s8
return vmlsl_high_s8(a, b, c);
- // CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
+// CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I_I]]
int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
- // CHECK-LABEL: test_vmlsl_high_s16
return vmlsl_high_s16(a, b, c);
- // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
+// CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I_I]]
int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
- // CHECK-LABEL: test_vmlsl_high_s32
return vmlsl_high_s32(a, b, c);
- // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
+// CHECK: ret <8 x i16> [[SUB_I_I]]
uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
- // CHECK-LABEL: test_vmlsl_high_u8
return vmlsl_high_u8(a, b, c);
- // CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
+// CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
+// CHECK: ret <4 x i32> [[SUB_I_I]]
uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
- // CHECK-LABEL: test_vmlsl_high_u16
return vmlsl_high_u16(a, b, c);
- // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
+// CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
+// CHECK: ret <2 x i64> [[SUB_I_I]]
uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
- // CHECK-LABEL: test_vmlsl_high_u32
return vmlsl_high_u32(a, b, c);
- // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4
+// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
- // CHECK-LABEL: test_vqdmull_s16
return vqdmull_s16(a, b);
- // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4
+// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
- // CHECK-LABEL: test_vqdmull_s32
return vqdmull_s32(a, b);
- // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
- // CHECK-LABEL: test_vqdmlal_s16
return vqdmlal_s16(a, b, c);
- // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
- // CHECK-LABEL: test_vqdmlal_s32
return vqdmlal_s32(a, b, c);
- // CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
- // CHECK-LABEL: test_vqdmlsl_s16
return vqdmlsl_s16(a, b, c);
- // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
}
+// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
+// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
- // CHECK-LABEL: test_vqdmlsl_s32
return vqdmlsl_s32(a, b, c);
- // CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
}
+// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]]) #4
+// CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32>
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
- // CHECK-LABEL: test_vqdmull_high_s16
return vqdmull_high_s16(a, b);
- // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]]) #4
+// CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64>
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
- // CHECK-LABEL: test_vqdmull_high_s32
return vqdmull_high_s32(a, b);
- // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4
+// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]]
int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
- // CHECK-LABEL: test_vqdmlal_high_s16
return vqdmlal_high_s16(a, b, c);
- // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4
+// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]]
int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
- // CHECK-LABEL: test_vqdmlal_high_s32
return vqdmlal_high_s32(a, b, c);
- // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4
+// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4
+// CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]]
int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
- // CHECK-LABEL: test_vqdmlsl_high_s16
return vqdmlsl_high_s16(a, b, c);
- // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
}
+// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4
+// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4
+// CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]]
int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
- // CHECK-LABEL: test_vqdmlsl_high_s32
return vqdmlsl_high_s32(a, b, c);
- // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
+// CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 {
+// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
+// CHECK: ret <8 x i16> [[VMULL_I]]
poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
- // CHECK-LABEL: test_vmull_p8
return vmull_p8(a, b);
- // CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}
+// CHECK-LABEL: define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) #0 {
+// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
+// CHECK: ret <8 x i16> [[VMULL_I_I]]
poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
- // CHECK-LABEL: test_vmull_high_p8
return vmull_high_p8(a, b);
- // CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
+// CHECK-LABEL: define i64 @test_vaddd_s64(i64 %a, i64 %b) #0 {
+// CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
+// CHECK: ret i64 [[VADDD_I]]
int64_t test_vaddd_s64(int64_t a, int64_t b) {
-// CHECK-LABEL: test_vaddd_s64
return vaddd_s64(a, b);
-// CHECK: add {{[xd][0-9]+}}, {{[xd][0-9]+}}, {{[xd][0-9]+}}
}
+// CHECK-LABEL: define i64 @test_vaddd_u64(i64 %a, i64 %b) #0 {
+// CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
+// CHECK: ret i64 [[VADDD_I]]
uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
-// CHECK-LABEL: test_vaddd_u64
return vaddd_u64(a, b);
-// CHECK: add {{[xd][0-9]+}}, {{[xd][0-9]+}}, {{[xd][0-9]+}}
}
+// CHECK-LABEL: define i64 @test_vsubd_s64(i64 %a, i64 %b) #0 {
+// CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
+// CHECK: ret i64 [[VSUBD_I]]
int64_t test_vsubd_s64(int64_t a, int64_t b) {
-// CHECK-LABEL: test_vsubd_s64
return vsubd_s64(a, b);
-// CHECK: sub {{[xd][0-9]+}}, {{[xd][0-9]+}}, {{[xd][0-9]+}}
}
+// CHECK-LABEL: define i64 @test_vsubd_u64(i64 %a, i64 %b) #0 {
+// CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
+// CHECK: ret i64 [[VSUBD_I]]
uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
-// CHECK-LABEL: test_vsubd_u64
return vsubd_u64(a, b);
-// CHECK: sub {{[xd][0-9]+}}, {{[xd][0-9]+}}, {{[xd][0-9]+}}
}
+// CHECK-LABEL: define i8 @test_vqaddb_s8(i8 %a, i8 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
+// CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
+// CHECK: ret i8 [[TMP2]]
int8_t test_vqaddb_s8(int8_t a, int8_t b) {
-// CHECK-LABEL: test_vqaddb_s8
return vqaddb_s8(a, b);
-// CHECK: sqadd {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}
}
+// CHECK-LABEL: define i16 @test_vqaddh_s16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
int16_t test_vqaddh_s16(int16_t a, int16_t b) {
-// CHECK-LABEL: test_vqaddh_s16
return vqaddh_s16(a, b);
-// CHECK: sqadd {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
+// CHECK-LABEL: define i32 @test_vqadds_s32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQADDS_S32_I]]
int32_t test_vqadds_s32(int32_t a, int32_t b) {
-// CHECK-LABEL: test_vqadds_s32
return vqadds_s32(a, b);
-// CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
+// CHECK-LABEL: define i64 @test_vqaddd_s64(i64 %a, i64 %b) #0 {
+// CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VQADDD_S64_I]]
int64_t test_vqaddd_s64(int64_t a, int64_t b) {
-// CHECK-LABEL: test_vqaddd_s64
return vqaddd_s64(a, b);
-// CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define i8 @test_vqaddb_u8(i8 %a, i8 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
+// CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
+// CHECK: ret i8 [[TMP2]]
uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
-// CHECK-LABEL: test_vqaddb_u8
return vqaddb_u8(a, b);
-// CHECK: uqadd {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}
}
+// CHECK-LABEL: define i16 @test_vqaddh_u16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
-// CHECK-LABEL: test_vqaddh_u16
return vqaddh_u16(a, b);
-// CHECK: uqadd {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
+// CHECK-LABEL: define i32 @test_vqadds_u32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQADDS_U32_I]]
uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
-// CHECK-LABEL: test_vqadds_u32
return vqadds_u32(a, b);
-// CHECK: uqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
+// CHECK-LABEL: define i64 @test_vqaddd_u64(i64 %a, i64 %b) #0 {
+// CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VQADDD_U64_I]]
uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
-// CHECK-LABEL: test_vqaddd_u64
return vqaddd_u64(a, b);
-// CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define i8 @test_vqsubb_s8(i8 %a, i8 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
+// CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
+// CHECK: ret i8 [[TMP2]]
int8_t test_vqsubb_s8(int8_t a, int8_t b) {
-// CHECK-LABEL: test_vqsubb_s8
return vqsubb_s8(a, b);
-// CHECK: sqsub {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}
}
+// CHECK-LABEL: define i16 @test_vqsubh_s16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
int16_t test_vqsubh_s16(int16_t a, int16_t b) {
-// CHECK-LABEL: test_vqsubh_s16
return vqsubh_s16(a, b);
-// CHECK: sqsub {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
+// CHECK-LABEL: define i32 @test_vqsubs_s32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQSUBS_S32_I]]
int32_t test_vqsubs_s32(int32_t a, int32_t b) {
- // CHECK-LABEL: test_vqsubs_s32
return vqsubs_s32(a, b);
-// CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
+// CHECK-LABEL: define i64 @test_vqsubd_s64(i64 %a, i64 %b) #0 {
+// CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VQSUBD_S64_I]]
int64_t test_vqsubd_s64(int64_t a, int64_t b) {
-// CHECK-LABEL: test_vqsubd_s64
return vqsubd_s64(a, b);
-// CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define i8 @test_vqsubb_u8(i8 %a, i8 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
+// CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
+// CHECK: ret i8 [[TMP2]]
uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
-// CHECK-LABEL: test_vqsubb_u8
return vqsubb_u8(a, b);
-// CHECK: uqsub {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}
}
+// CHECK-LABEL: define i16 @test_vqsubh_u16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
-// CHECK-LABEL: test_vqsubh_u16
return vqsubh_u16(a, b);
-// CHECK: uqsub {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
+// CHECK-LABEL: define i32 @test_vqsubs_u32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQSUBS_U32_I]]
uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
-// CHECK-LABEL: test_vqsubs_u32
return vqsubs_u32(a, b);
-// CHECK: uqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
+// CHECK-LABEL: define i64 @test_vqsubd_u64(i64 %a, i64 %b) #0 {
+// CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VQSUBD_U64_I]]
uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
-// CHECK-LABEL: test_vqsubd_u64
return vqsubd_u64(a, b);
-// CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define i64 @test_vshld_s64(i64 %a, i64 %b) #0 {
+// CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VSHLD_S64_I]]
int64_t test_vshld_s64(int64_t a, int64_t b) {
-// CHECK-LABEL: test_vshld_s64
return vshld_s64(a, b);
-// CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define i64 @test_vshld_u64(i64 %a, i64 %b) #0 {
+// CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VSHLD_U64_I]]
uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
-// CHECK-LABEL: test_vshld_u64
return vshld_u64(a, b);
-// CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
-// CHECK-LABEL: test_vqshlb_s8
+// CHECK-LABEL: define i8 @test_vqshlb_s8(i8 %a, i8 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
+// CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
+// CHECK: ret i8 [[TMP2]]
int8_t test_vqshlb_s8(int8_t a, int8_t b) {
return vqshlb_s8(a, b);
-// CHECK: sqshl {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}
}
-// CHECK-LABEL: test_vqshlh_s16
+// CHECK-LABEL: define i16 @test_vqshlh_s16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
int16_t test_vqshlh_s16(int16_t a, int16_t b) {
return vqshlh_s16(a, b);
-// CHECK: sqshl {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
-// CHECK-LABEL: test_vqshls_s32
+// CHECK-LABEL: define i32 @test_vqshls_s32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQSHLS_S32_I]]
int32_t test_vqshls_s32(int32_t a, int32_t b) {
return vqshls_s32(a, b);
-// CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
-// CHECK-LABEL: test_vqshld_s64
+// CHECK-LABEL: define i64 @test_vqshld_s64(i64 %a, i64 %b) #0 {
+// CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VQSHLD_S64_I]]
int64_t test_vqshld_s64(int64_t a, int64_t b) {
return vqshld_s64(a, b);
-// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
-// CHECK-LABEL: test_vqshlb_u8
+// CHECK-LABEL: define i8 @test_vqshlb_u8(i8 %a, i8 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
+// CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
+// CHECK: ret i8 [[TMP2]]
uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
return vqshlb_u8(a, b);
-// CHECK: uqshl {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}
}
-// CHECK-LABEL: test_vqshlh_u16
+// CHECK-LABEL: define i16 @test_vqshlh_u16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
return vqshlh_u16(a, b);
-// CHECK: uqshl {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
-// CHECK-LABEL: test_vqshls_u32
+// CHECK-LABEL: define i32 @test_vqshls_u32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQSHLS_U32_I]]
uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
return vqshls_u32(a, b);
-// CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
-// CHECK-LABEL: test_vqshld_u64
+// CHECK-LABEL: define i64 @test_vqshld_u64(i64 %a, i64 %b) #0 {
+// CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VQSHLD_U64_I]]
uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
return vqshld_u64(a, b);
-// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
-// CHECK-LABEL: test_vrshld_s64
+// CHECK-LABEL: define i64 @test_vrshld_s64(i64 %a, i64 %b) #0 {
+// CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VRSHLD_S64_I]]
int64_t test_vrshld_s64(int64_t a, int64_t b) {
return vrshld_s64(a, b);
-// CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
-// CHECK-LABEL: test_vrshld_u64
+// CHECK-LABEL: define i64 @test_vrshld_u64(i64 %a, i64 %b) #0 {
+// CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VRSHLD_U64_I]]
uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
return vrshld_u64(a, b);
-// CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
-// CHECK-LABEL: test_vqrshlb_s8
+// CHECK-LABEL: define i8 @test_vqrshlb_s8(i8 %a, i8 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
+// CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
+// CHECK: ret i8 [[TMP2]]
int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
return vqrshlb_s8(a, b);
-// CHECK: sqrshl {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}
}
-// CHECK-LABEL: test_vqrshlh_s16
+// CHECK-LABEL: define i16 @test_vqrshlh_s16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
return vqrshlh_s16(a, b);
-// CHECK: sqrshl {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
-// CHECK-LABEL: test_vqrshls_s32
+// CHECK-LABEL: define i32 @test_vqrshls_s32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQRSHLS_S32_I]]
int32_t test_vqrshls_s32(int32_t a, int32_t b) {
return vqrshls_s32(a, b);
-// CHECK: sqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
-// CHECK-LABEL: test_vqrshld_s64
+// CHECK-LABEL: define i64 @test_vqrshld_s64(i64 %a, i64 %b) #0 {
+// CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VQRSHLD_S64_I]]
int64_t test_vqrshld_s64(int64_t a, int64_t b) {
return vqrshld_s64(a, b);
-// CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
-// CHECK-LABEL: test_vqrshlb_u8
+// CHECK-LABEL: define i8 @test_vqrshlb_u8(i8 %a, i8 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
+// CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
+// CHECK: ret i8 [[TMP2]]
uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
return vqrshlb_u8(a, b);
-// CHECK: uqrshl {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}, {{b[0-9]+|v[0-9]+.8b}}
}
-// CHECK-LABEL: test_vqrshlh_u16
+// CHECK-LABEL: define i16 @test_vqrshlh_u16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
return vqrshlh_u16(a, b);
-// CHECK: uqrshl {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
-// CHECK-LABEL: test_vqrshls_u32
+// CHECK-LABEL: define i32 @test_vqrshls_u32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQRSHLS_U32_I]]
uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
return vqrshls_u32(a, b);
-// CHECK: uqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
-// CHECK-LABEL: test_vqrshld_u64
+// CHECK-LABEL: define i64 @test_vqrshld_u64(i64 %a, i64 %b) #0 {
+// CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) #4
+// CHECK: ret i64 [[VQRSHLD_U64_I]]
uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
return vqrshld_u64(a, b);
-// CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
-// CHECK-LABEL: test_vpaddd_s64
+// CHECK-LABEL: define i64 @test_vpaddd_s64(<2 x i64> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
+// CHECK: ret i64 [[VPADDD_S64_I]]
int64_t test_vpaddd_s64(int64x2_t a) {
return vpaddd_s64(a);
-// CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
}
-// CHECK-LABEL: test_vpadds_f32
+// CHECK-LABEL: define float @test_vpadds_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 0
+// CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 1
+// CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
+// CHECK: ret float [[VPADDD_I]]
float32_t test_vpadds_f32(float32x2_t a) {
return vpadds_f32(a);
-// CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
}
-// CHECK-LABEL: test_vpaddd_f64
+// CHECK-LABEL: define double @test_vpaddd_f64(<2 x double> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 0
+// CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 1
+// CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
+// CHECK: ret double [[VPADDD_I]]
float64_t test_vpaddd_f64(float64x2_t a) {
return vpaddd_f64(a);
-// CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
}
-// CHECK-LABEL: test_vpmaxnms_f32
+// CHECK-LABEL: define float @test_vpmaxnms_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
+// CHECK: ret float [[VPMAXNMS_F32_I]]
float32_t test_vpmaxnms_f32(float32x2_t a) {
return vpmaxnms_f32(a);
-// CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
}
-// CHECK-LABEL: test_vpmaxnmqd_f64
+// CHECK-LABEL: define double @test_vpmaxnmqd_f64(<2 x double> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
+// CHECK: ret double [[VPMAXNMQD_F64_I]]
float64_t test_vpmaxnmqd_f64(float64x2_t a) {
return vpmaxnmqd_f64(a);
-// CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
}
-// CHECK-LABEL: test_vpmaxs_f32
+// CHECK-LABEL: define float @test_vpmaxs_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4
+// CHECK: ret float [[VPMAXS_F32_I]]
float32_t test_vpmaxs_f32(float32x2_t a) {
return vpmaxs_f32(a);
-// CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
}
-// CHECK-LABEL: test_vpmaxqd_f64
+// CHECK-LABEL: define double @test_vpmaxqd_f64(<2 x double> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4
+// CHECK: ret double [[VPMAXQD_F64_I]]
float64_t test_vpmaxqd_f64(float64x2_t a) {
return vpmaxqd_f64(a);
-// CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
}
-// CHECK-LABEL: test_vpminnms_f32
+// CHECK-LABEL: define float @test_vpminnms_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
+// CHECK: ret float [[VPMINNMS_F32_I]]
float32_t test_vpminnms_f32(float32x2_t a) {
return vpminnms_f32(a);
-// CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
}
-// CHECK-LABEL: test_vpminnmqd_f64
+// CHECK-LABEL: define double @test_vpminnmqd_f64(<2 x double> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
+// CHECK: ret double [[VPMINNMQD_F64_I]]
float64_t test_vpminnmqd_f64(float64x2_t a) {
return vpminnmqd_f64(a);
-// CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
}
-// CHECK-LABEL: test_vpmins_f32
+// CHECK-LABEL: define float @test_vpmins_f32(<2 x float> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4
+// CHECK: ret float [[VPMINS_F32_I]]
float32_t test_vpmins_f32(float32x2_t a) {
return vpmins_f32(a);
-// CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
}
-// CHECK-LABEL: test_vpminqd_f64
+// CHECK-LABEL: define double @test_vpminqd_f64(<2 x double> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4
+// CHECK: ret double [[VPMINQD_F64_I]]
float64_t test_vpminqd_f64(float64x2_t a) {
return vpminqd_f64(a);
-// CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
}
+// CHECK-LABEL: define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
-// CHECK-LABEL: test_vqdmulhh_s16
return vqdmulhh_s16(a, b);
-// CHECK: sqdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
+// CHECK-LABEL: define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQDMULHS_S32_I]]
int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
-// CHECK-LABEL: test_vqdmulhs_s32
return vqdmulhs_s32(a, b);
-// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
+// CHECK-LABEL: define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) #0 {
+// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
+// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
+// CHECK: ret i16 [[TMP2]]
int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
-// CHECK-LABEL: test_vqrdmulhh_s16
return vqrdmulhh_s16(a, b);
-// CHECK: sqrdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
}
+// CHECK-LABEL: define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) #0 {
+// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) #4
+// CHECK: ret i32 [[VQRDMULHS_S32_I]]
int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
-// CHECK-LABEL: test_vqrdmulhs_s32
return vqrdmulhs_s32(a, b);
-// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
+// CHECK-LABEL: define float @test_vmulxs_f32(float %a, float %b) #0 {
+// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) #4
+// CHECK: ret float [[VMULXS_F32_I]]
float32_t test_vmulxs_f32(float32_t a, float32_t b) {
-// CHECK-LABEL: test_vmulxs_f32
return vmulxs_f32(a, b);
-// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
+// CHECK-LABEL: define double @test_vmulxd_f64(double %a, double %b) #0 {
+// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) #4
+// CHECK: ret double [[VMULXD_F64_I]]
float64_t test_vmulxd_f64(float64_t a, float64_t b) {
-// CHECK-LABEL: test_vmulxd_f64
return vmulxd_f64(a, b);
-// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define <1 x double> @test_vmulx_f64(<1 x double> %a, <1 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> [[VMULX_I]], <1 x double> [[VMULX1_I]]) #4
+// CHECK: ret <1 x double> [[VMULX2_I]]
float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
-// CHECK-LABEL: test_vmulx_f64
return vmulx_f64(a, b);
-// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define float @test_vrecpss_f32(float %a, float %b) #0 {
+// CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) #4
+// CHECK: ret float [[VRECPS_I]]
float32_t test_vrecpss_f32(float32_t a, float32_t b) {
-// CHECK-LABEL: test_vrecpss_f32
return vrecpss_f32(a, b);
-// CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
+// CHECK-LABEL: define double @test_vrecpsd_f64(double %a, double %b) #0 {
+// CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) #4
+// CHECK: ret double [[VRECPS_I]]
float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
-// CHECK-LABEL: test_vrecpsd_f64
return vrecpsd_f64(a, b);
-// CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define float @test_vrsqrtss_f32(float %a, float %b) #0 {
+// CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) #4
+// CHECK: ret float [[VRSQRTSS_F32_I]]
float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
-// CHECK-LABEL: test_vrsqrtss_f32
return vrsqrtss_f32(a, b);
-// CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
}
+// CHECK-LABEL: define double @test_vrsqrtsd_f64(double %a, double %b) #0 {
+// CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) #4
+// CHECK: ret double [[VRSQRTSD_F64_I]]
float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
-// CHECK-LABEL: test_vrsqrtsd_f64
return vrsqrtsd_f64(a, b);
-// CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
}
+// CHECK-LABEL: define float @test_vcvts_f32_s32(i32 %a) #0 {
+// CHECK: [[TMP0:%.*]] = sitofp i32 %a to float
+// CHECK: ret float [[TMP0]]
float32_t test_vcvts_f32_s32(int32_t a) {
-// CHECK-LABEL: test_vcvts_f32_s32
-// CHECK: scvtf {{s[0-9]+}}, {{[ws][0-9]+}}
return vcvts_f32_s32(a);
}
+// CHECK-LABEL: define double @test_vcvtd_f64_s64(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = sitofp i64 %a to double
+// CHECK: ret double [[TMP0]]
float64_t test_vcvtd_f64_s64(int64_t a) {
-// CHECK-LABEL: test_vcvtd_f64_s64
-// CHECK: scvtf {{d[0-9]+}}, {{[dx][0-9]+}}
return vcvtd_f64_s64(a);
}
+// CHECK-LABEL: define float @test_vcvts_f32_u32(i32 %a) #0 {
+// CHECK: [[TMP0:%.*]] = uitofp i32 %a to float
+// CHECK: ret float [[TMP0]]
float32_t test_vcvts_f32_u32(uint32_t a) {
-// CHECK-LABEL: test_vcvts_f32_u32
-// CHECK: ucvtf {{s[0-9]+}}, {{[ws][0-9]+}}
return vcvts_f32_u32(a);
}
+// CHECK-LABEL: define double @test_vcvtd_f64_u64(i64 %a) #0 {
+// CHECK: [[TMP0:%.*]] = uitofp i64 %a to double
+// CHECK: ret double [[TMP0]]
float64_t test_vcvtd_f64_u64(uint64_t a) {
-// CHECK-LABEL: test_vcvtd_f64_u64
-// CHECK: ucvtf {{d[0-9]+}}, {{[xd][0-9]+}}
return vcvtd_f64_u64(a);
}
+// CHECK-LABEL: define float @test_vrecpes_f32(float %a) #0 {
+// CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) #4
+// CHECK: ret float [[VRECPES_F32_I]]
float32_t test_vrecpes_f32(float32_t a) {
-// CHECK-LABEL: test_vrecpes_f32
-// CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}}
return vrecpes_f32(a);
}
+// CHECK-LABEL: define double @test_vrecped_f64(double %a) #0 {
+// CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) #4
+// CHECK: ret double [[VRECPED_F64_I]]
float64_t test_vrecped_f64(float64_t a) {
-// CHECK-LABEL: test_vrecped_f64
-// CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}}
return vrecped_f64(a);
}
+// CHECK-LABEL: define float @test_vrecpxs_f32(float %a) #0 {
+// CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) #4
+// CHECK: ret float [[VRECPXS_F32_I]]
float32_t test_vrecpxs_f32(float32_t a) {
-// CHECK-LABEL: test_vrecpxs_f32
-// CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}}
return vrecpxs_f32(a);
}
+// CHECK-LABEL: define double @test_vrecpxd_f64(double %a) #0 {
+// CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) #4
+// CHECK: ret double [[VRECPXD_F64_I]]
float64_t test_vrecpxd_f64(float64_t a) {
-// CHECK-LABEL: test_vrecpxd_f64
-// CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}}
return vrecpxd_f64(a);
}
+// CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4
+// CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
-// CHECK-LABEL: test_vrsqrte_u32
-// CHECK: ursqrte {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
return vrsqrte_u32(a);
}
+// CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4
+// CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
-// CHECK-LABEL: test_vrsqrteq_u32
-// CHECK: ursqrte {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
return vrsqrteq_u32(a);
}
+// CHECK-LABEL: define float @test_vrsqrtes_f32(float %a) #0 {
+// CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) #4
+// CHECK: ret float [[VRSQRTES_F32_I]]
float32_t test_vrsqrtes_f32(float32_t a) {
-// CHECK: vrsqrtes_f32
-// CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}}
return vrsqrtes_f32(a);
}
+// CHECK-LABEL: define double @test_vrsqrted_f64(double %a) #0 {
+// CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) #4
+// CHECK: ret double [[VRSQRTED_F64_I]]
float64_t test_vrsqrted_f64(float64_t a) {
-// CHECK: vrsqrted_f64
-// CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}}
return vrsqrted_f64(a);
}
+// CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
+// CHECK: ret <16 x i8> [[TMP1]]
uint8x16_t test_vld1q_u8(uint8_t const *a) {
- // CHECK-LABEL: test_vld1q_u8
return vld1q_u8(a);
- // CHECK: {{ld1 { v[0-9]+.16b }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
+// CHECK: ret <8 x i16> [[TMP2]]
uint16x8_t test_vld1q_u16(uint16_t const *a) {
- // CHECK-LABEL: test_vld1q_u16
return vld1q_u16(a);
- // CHECK: {{ld1 { v[0-9]+.8h }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
+// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
+// CHECK: ret <4 x i32> [[TMP2]]
uint32x4_t test_vld1q_u32(uint32_t const *a) {
- // CHECK-LABEL: test_vld1q_u32
return vld1q_u32(a);
- // CHECK: {{ld1 { v[0-9]+.4s }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
+// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
+// CHECK: ret <2 x i64> [[TMP2]]
uint64x2_t test_vld1q_u64(uint64_t const *a) {
- // CHECK-LABEL: test_vld1q_u64
return vld1q_u64(a);
- // CHECK: {{ld1 { v[0-9]+.2d }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
+// CHECK: ret <16 x i8> [[TMP1]]
int8x16_t test_vld1q_s8(int8_t const *a) {
- // CHECK-LABEL: test_vld1q_s8
return vld1q_s8(a);
- // CHECK: {{ld1 { v[0-9]+.16b }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
+// CHECK: ret <8 x i16> [[TMP2]]
int16x8_t test_vld1q_s16(int16_t const *a) {
- // CHECK-LABEL: test_vld1q_s16
return vld1q_s16(a);
- // CHECK: {{ld1 { v[0-9]+.8h }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
+// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
+// CHECK: ret <4 x i32> [[TMP2]]
int32x4_t test_vld1q_s32(int32_t const *a) {
- // CHECK-LABEL: test_vld1q_s32
return vld1q_s32(a);
- // CHECK: {{ld1 { v[0-9]+.4s }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
+// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
+// CHECK: ret <2 x i64> [[TMP2]]
int64x2_t test_vld1q_s64(int64_t const *a) {
- // CHECK-LABEL: test_vld1q_s64
return vld1q_s64(a);
- // CHECK: {{ld1 { v[0-9]+.2d }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
+// CHECK: ret <8 x half> [[TMP3]]
float16x8_t test_vld1q_f16(float16_t const *a) {
- // CHECK-LABEL: test_vld1q_f16
return vld1q_f16(a);
- // CHECK: {{ld1 { v[0-9]+.8h }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
+// CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]]
+// CHECK: ret <4 x float> [[TMP2]]
float32x4_t test_vld1q_f32(float32_t const *a) {
- // CHECK-LABEL: test_vld1q_f32
return vld1q_f32(a);
- // CHECK: {{ld1 { v[0-9]+.4s }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <2 x double> @test_vld1q_f64(double* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
+// CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]]
+// CHECK: ret <2 x double> [[TMP2]]
float64x2_t test_vld1q_f64(float64_t const *a) {
- // CHECK-LABEL: test_vld1q_f64
return vld1q_f64(a);
- // CHECK: {{ld1 { v[0-9]+.2d }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
+// CHECK: ret <16 x i8> [[TMP1]]
poly8x16_t test_vld1q_p8(poly8_t const *a) {
- // CHECK-LABEL: test_vld1q_p8
return vld1q_p8(a);
- // CHECK: {{ld1 { v[0-9]+.16b }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
+// CHECK: ret <8 x i16> [[TMP2]]
poly16x8_t test_vld1q_p16(poly16_t const *a) {
- // CHECK-LABEL: test_vld1q_p16
return vld1q_p16(a);
- // CHECK: {{ld1 { v[0-9]+.8h }|ldr q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
+// CHECK: ret <8 x i8> [[TMP1]]
uint8x8_t test_vld1_u8(uint8_t const *a) {
- // CHECK-LABEL: test_vld1_u8
return vld1_u8(a);
- // CHECK: {{ld1 { v[0-9]+.8b }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
+// CHECK: ret <4 x i16> [[TMP2]]
uint16x4_t test_vld1_u16(uint16_t const *a) {
- // CHECK-LABEL: test_vld1_u16
return vld1_u16(a);
- // CHECK: {{ld1 { v[0-9]+.4h }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
+// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
+// CHECK: ret <2 x i32> [[TMP2]]
uint32x2_t test_vld1_u32(uint32_t const *a) {
- // CHECK-LABEL: test_vld1_u32
return vld1_u32(a);
- // CHECK: {{ld1 { v[0-9]+.2s }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
+// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
+// CHECK: ret <1 x i64> [[TMP2]]
uint64x1_t test_vld1_u64(uint64_t const *a) {
- // CHECK-LABEL: test_vld1_u64
return vld1_u64(a);
- // CHECK: {{ld1 { v[0-9]+.1d }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
+// CHECK: ret <8 x i8> [[TMP1]]
int8x8_t test_vld1_s8(int8_t const *a) {
- // CHECK-LABEL: test_vld1_s8
return vld1_s8(a);
- // CHECK: {{ld1 { v[0-9]+.8b }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
+// CHECK: ret <4 x i16> [[TMP2]]
int16x4_t test_vld1_s16(int16_t const *a) {
- // CHECK-LABEL: test_vld1_s16
return vld1_s16(a);
- // CHECK: {{ld1 { v[0-9]+.4h }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
+// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
+// CHECK: ret <2 x i32> [[TMP2]]
int32x2_t test_vld1_s32(int32_t const *a) {
- // CHECK-LABEL: test_vld1_s32
return vld1_s32(a);
- // CHECK: {{ld1 { v[0-9]+.2s }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
+// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
+// CHECK: ret <1 x i64> [[TMP2]]
int64x1_t test_vld1_s64(int64_t const *a) {
- // CHECK-LABEL: test_vld1_s64
return vld1_s64(a);
- // CHECK: {{ld1 { v[0-9]+.1d }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
+// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
+// CHECK: ret <4 x half> [[TMP3]]
float16x4_t test_vld1_f16(float16_t const *a) {
- // CHECK-LABEL: test_vld1_f16
return vld1_f16(a);
- // CHECK: {{ld1 { v[0-9]+.4h }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
+// CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]]
+// CHECK: ret <2 x float> [[TMP2]]
float32x2_t test_vld1_f32(float32_t const *a) {
- // CHECK-LABEL: test_vld1_f32
return vld1_f32(a);
- // CHECK: {{ld1 { v[0-9]+.2s }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <1 x double> @test_vld1_f64(double* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
+// CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]]
+// CHECK: ret <1 x double> [[TMP2]]
float64x1_t test_vld1_f64(float64_t const *a) {
- // CHECK-LABEL: test_vld1_f64
return vld1_f64(a);
- // CHECK: {{ld1 { v[0-9]+.1d }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
+// CHECK: ret <8 x i8> [[TMP1]]
poly8x8_t test_vld1_p8(poly8_t const *a) {
- // CHECK-LABEL: test_vld1_p8
return vld1_p8(a);
- // CHECK: {{ld1 { v[0-9]+.8b }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
+// CHECK: ret <4 x i16> [[TMP2]]
poly16x4_t test_vld1_p16(poly16_t const *a) {
- // CHECK-LABEL: test_vld1_p16
return vld1_p16(a);
- // CHECK: {{ld1 { v[0-9]+.4h }|ldr d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint8x16x2_t [[TMP5]]
uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
- // CHECK-LABEL: test_vld2q_u8
return vld2q_u8(a);
- // CHECK: ld2 {{{ ?v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x2_t [[TMP6]]
uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
- // CHECK-LABEL: test_vld2q_u16
return vld2q_u16(a);
- // CHECK: ld2 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x2_t [[TMP6]]
uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
- // CHECK-LABEL: test_vld2q_u32
return vld2q_u32(a);
- // CHECK: ld2 {{{ ?v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
+// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint64x2x2_t [[TMP6]]
uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
- // CHECK-LABEL: test_vld2q_u64
return vld2q_u64(a);
- // CHECK: ld2 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int8x16x2_t [[TMP5]]
int8x16x2_t test_vld2q_s8(int8_t const *a) {
- // CHECK-LABEL: test_vld2q_s8
return vld2q_s8(a);
- // CHECK: ld2 {{{ ?v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x2_t [[TMP6]]
int16x8x2_t test_vld2q_s16(int16_t const *a) {
- // CHECK-LABEL: test_vld2q_s16
return vld2q_s16(a);
- // CHECK: ld2 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x2_t [[TMP6]]
int32x4x2_t test_vld2q_s32(int32_t const *a) {
- // CHECK-LABEL: test_vld2q_s32
return vld2q_s32(a);
- // CHECK: ld2 {{{ ?v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
+// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int64x2x2_t [[TMP6]]
int64x2x2_t test_vld2q_s64(int64_t const *a) {
- // CHECK-LABEL: test_vld2q_s64
return vld2q_s64(a);
- // CHECK: ld2 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x2_t [[TMP6]]
float16x8x2_t test_vld2q_f16(float16_t const *a) {
- // CHECK-LABEL: test_vld2q_f16
return vld2q_f16(a);
- // CHECK: ld2 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
+// CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x2_t [[TMP6]]
float32x4x2_t test_vld2q_f32(float32_t const *a) {
- // CHECK-LABEL: test_vld2q_f32
return vld2q_f32(a);
- // CHECK: ld2 {{{ ?v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_f64(double* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
+// CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
+// CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float64x2x2_t [[TMP6]]
float64x2x2_t test_vld2q_f64(float64_t const *a) {
- // CHECK-LABEL: test_vld2q_f64
return vld2q_f64(a);
- // CHECK: ld2 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly8x16x2_t [[TMP5]]
poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
- // CHECK-LABEL: test_vld2q_p8
return vld2q_p8(a);
- // CHECK: ld2 {{{ ?v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x2_t [[TMP6]]
poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
- // CHECK-LABEL: test_vld2q_p16
return vld2q_p16(a);
- // CHECK: ld2 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x2_t [[TMP5]]
uint8x8x2_t test_vld2_u8(uint8_t const *a) {
- // CHECK-LABEL: test_vld2_u8
return vld2_u8(a);
- // CHECK: ld2 {{{ ?v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x2_t [[TMP6]]
uint16x4x2_t test_vld2_u16(uint16_t const *a) {
- // CHECK-LABEL: test_vld2_u16
return vld2_u16(a);
- // CHECK: ld2 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x2_t [[TMP6]]
uint32x2x2_t test_vld2_u32(uint32_t const *a) {
- // CHECK-LABEL: test_vld2_u32
return vld2_u32(a);
- // CHECK: ld2 {{{ ?v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x2_t [[TMP6]]
uint64x1x2_t test_vld2_u64(uint64_t const *a) {
- // CHECK-LABEL: test_vld2_u64
return vld2_u64(a);
- // CHECK: {{ld1|ld2}} {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x2_t [[TMP5]]
int8x8x2_t test_vld2_s8(int8_t const *a) {
- // CHECK-LABEL: test_vld2_s8
return vld2_s8(a);
- // CHECK: ld2 {{{ ?v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x2_t [[TMP6]]
int16x4x2_t test_vld2_s16(int16_t const *a) {
- // CHECK-LABEL: test_vld2_s16
return vld2_s16(a);
- // CHECK: ld2 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x2_t [[TMP6]]
int32x2x2_t test_vld2_s32(int32_t const *a) {
- // CHECK-LABEL: test_vld2_s32
return vld2_s32(a);
- // CHECK: ld2 {{{ ?v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x2_t [[TMP6]]
int64x1x2_t test_vld2_s64(int64_t const *a) {
- // CHECK-LABEL: test_vld2_s64
return vld2_s64(a);
- // CHECK: {{ld1|ld2}} {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x2_t [[TMP6]]
float16x4x2_t test_vld2_f16(float16_t const *a) {
- // CHECK-LABEL: test_vld2_f16
return vld2_f16(a);
- // CHECK: ld2 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
+// CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x2_t [[TMP6]]
float32x2x2_t test_vld2_f32(float32_t const *a) {
- // CHECK-LABEL: test_vld2_f32
return vld2_f32(a);
- // CHECK: ld2 {{{ ?v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_f64(double* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
+// CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
+// CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float64x1x2_t [[TMP6]]
float64x1x2_t test_vld2_f64(float64_t const *a) {
- // CHECK-LABEL: test_vld2_f64
return vld2_f64(a);
- // CHECK: {{ld1|ld2}} {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x2_t [[TMP5]]
poly8x8x2_t test_vld2_p8(poly8_t const *a) {
- // CHECK-LABEL: test_vld2_p8
return vld2_p8(a);
- // CHECK: ld2 {{{ ?v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x2_t [[TMP6]]
poly16x4x2_t test_vld2_p16(poly16_t const *a) {
- // CHECK-LABEL: test_vld2_p16
return vld2_p16(a);
- // CHECK: ld2 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint8x16x3_t [[TMP5]]
uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
- // CHECK-LABEL: test_vld3q_u8
return vld3q_u8(a);
- // CHECK: ld3 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x3_t [[TMP6]]
uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
- // CHECK-LABEL: test_vld3q_u16
return vld3q_u16(a);
- // CHECK: ld3 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x3_t [[TMP6]]
uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
- // CHECK-LABEL: test_vld3q_u32
return vld3q_u32(a);
- // CHECK: ld3 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
+// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint64x2x3_t [[TMP6]]
uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
- // CHECK-LABEL: test_vld3q_u64
return vld3q_u64(a);
- // CHECK: ld3 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int8x16x3_t [[TMP5]]
int8x16x3_t test_vld3q_s8(int8_t const *a) {
- // CHECK-LABEL: test_vld3q_s8
return vld3q_s8(a);
- // CHECK: ld3 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x3_t [[TMP6]]
int16x8x3_t test_vld3q_s16(int16_t const *a) {
- // CHECK-LABEL: test_vld3q_s16
return vld3q_s16(a);
- // CHECK: ld3 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x3_t [[TMP6]]
int32x4x3_t test_vld3q_s32(int32_t const *a) {
- // CHECK-LABEL: test_vld3q_s32
return vld3q_s32(a);
- // CHECK: ld3 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
+// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int64x2x3_t [[TMP6]]
int64x2x3_t test_vld3q_s64(int64_t const *a) {
- // CHECK-LABEL: test_vld3q_s64
return vld3q_s64(a);
- // CHECK: ld3 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x3_t [[TMP6]]
float16x8x3_t test_vld3q_f16(float16_t const *a) {
- // CHECK-LABEL: test_vld3q_f16
return vld3q_f16(a);
- // CHECK: ld3 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
+// CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x3_t [[TMP6]]
float32x4x3_t test_vld3q_f32(float32_t const *a) {
- // CHECK-LABEL: test_vld3q_f32
return vld3q_f32(a);
- // CHECK: ld3 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_f64(double* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
+// CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
+// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float64x2x3_t [[TMP6]]
float64x2x3_t test_vld3q_f64(float64_t const *a) {
- // CHECK-LABEL: test_vld3q_f64
return vld3q_f64(a);
- // CHECK: ld3 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly8x16x3_t [[TMP5]]
poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
- // CHECK-LABEL: test_vld3q_p8
return vld3q_p8(a);
- // CHECK: ld3 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x3_t [[TMP6]]
poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
- // CHECK-LABEL: test_vld3q_p16
return vld3q_p16(a);
- // CHECK: ld3 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x3_t [[TMP5]]
uint8x8x3_t test_vld3_u8(uint8_t const *a) {
- // CHECK-LABEL: test_vld3_u8
return vld3_u8(a);
- // CHECK: ld3 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x3_t [[TMP6]]
uint16x4x3_t test_vld3_u16(uint16_t const *a) {
- // CHECK-LABEL: test_vld3_u16
return vld3_u16(a);
- // CHECK: ld3 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x3_t [[TMP6]]
uint32x2x3_t test_vld3_u32(uint32_t const *a) {
- // CHECK-LABEL: test_vld3_u32
return vld3_u32(a);
- // CHECK: ld3 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x3_t [[TMP6]]
uint64x1x3_t test_vld3_u64(uint64_t const *a) {
- // CHECK-LABEL: test_vld3_u64
return vld3_u64(a);
- // CHECK: {{ld1|ld3}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x3_t [[TMP5]]
int8x8x3_t test_vld3_s8(int8_t const *a) {
- // CHECK-LABEL: test_vld3_s8
return vld3_s8(a);
- // CHECK: ld3 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x3_t [[TMP6]]
int16x4x3_t test_vld3_s16(int16_t const *a) {
- // CHECK-LABEL: test_vld3_s16
return vld3_s16(a);
- // CHECK: ld3 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x3_t [[TMP6]]
int32x2x3_t test_vld3_s32(int32_t const *a) {
- // CHECK-LABEL: test_vld3_s32
return vld3_s32(a);
- // CHECK: ld3 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x3_t [[TMP6]]
int64x1x3_t test_vld3_s64(int64_t const *a) {
- // CHECK-LABEL: test_vld3_s64
return vld3_s64(a);
- // CHECK: {{ld1|ld3}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x3_t [[TMP6]]
float16x4x3_t test_vld3_f16(float16_t const *a) {
- // CHECK-LABEL: test_vld3_f16
return vld3_f16(a);
- // CHECK: ld3 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
+// CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x3_t [[TMP6]]
float32x2x3_t test_vld3_f32(float32_t const *a) {
- // CHECK-LABEL: test_vld3_f32
return vld3_f32(a);
- // CHECK: ld3 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_f64(double* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
+// CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
+// CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float64x1x3_t [[TMP6]]
float64x1x3_t test_vld3_f64(float64_t const *a) {
- // CHECK-LABEL: test_vld3_f64
return vld3_f64(a);
- // CHECK: {{ld1|ld3}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x3_t [[TMP5]]
poly8x8x3_t test_vld3_p8(poly8_t const *a) {
- // CHECK-LABEL: test_vld3_p8
return vld3_p8(a);
- // CHECK: ld3 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x3_t [[TMP6]]
poly16x4x3_t test_vld3_p16(poly16_t const *a) {
- // CHECK-LABEL: test_vld3_p16
return vld3_p16(a);
- // CHECK: ld3 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint8x16x4_t [[TMP5]]
uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
- // CHECK-LABEL: test_vld4q_u8
return vld4q_u8(a);
- // CHECK: ld4 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x4_t [[TMP6]]
uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
- // CHECK-LABEL: test_vld4q_u16
return vld4q_u16(a);
- // CHECK: ld4 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x4_t [[TMP6]]
uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
- // CHECK-LABEL: test_vld4q_u32
return vld4q_u32(a);
- // CHECK: ld4 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
+// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint64x2x4_t [[TMP6]]
uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
- // CHECK-LABEL: test_vld4q_u64
return vld4q_u64(a);
- // CHECK: ld4 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int8x16x4_t [[TMP5]]
int8x16x4_t test_vld4q_s8(int8_t const *a) {
- // CHECK-LABEL: test_vld4q_s8
return vld4q_s8(a);
- // CHECK: ld4 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x4_t [[TMP6]]
int16x8x4_t test_vld4q_s16(int16_t const *a) {
- // CHECK-LABEL: test_vld4q_s16
return vld4q_s16(a);
- // CHECK: ld4 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x4_t [[TMP6]]
int32x4x4_t test_vld4q_s32(int32_t const *a) {
- // CHECK-LABEL: test_vld4q_s32
return vld4q_s32(a);
- // CHECK: ld4 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
+// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int64x2x4_t [[TMP6]]
int64x2x4_t test_vld4q_s64(int64_t const *a) {
- // CHECK-LABEL: test_vld4q_s64
return vld4q_s64(a);
- // CHECK: ld4 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x4_t [[TMP6]]
float16x8x4_t test_vld4q_f16(float16_t const *a) {
- // CHECK-LABEL: test_vld4q_f16
return vld4q_f16(a);
- // CHECK: ld4 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
+// CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x4_t [[TMP6]]
float32x4x4_t test_vld4q_f32(float32_t const *a) {
- // CHECK-LABEL: test_vld4q_f32
return vld4q_f32(a);
- // CHECK: ld4 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_f64(double* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
+// CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
+// CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float64x2x4_t [[TMP6]]
float64x2x4_t test_vld4q_f64(float64_t const *a) {
- // CHECK-LABEL: test_vld4q_f64
return vld4q_f64(a);
- // CHECK: ld4 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly8x16x4_t [[TMP5]]
poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
- // CHECK-LABEL: test_vld4q_p8
return vld4q_p8(a);
- // CHECK: ld4 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x4_t [[TMP6]]
poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
- // CHECK-LABEL: test_vld4q_p16
return vld4q_p16(a);
- // CHECK: ld4 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_u8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x4_t [[TMP5]]
uint8x8x4_t test_vld4_u8(uint8_t const *a) {
- // CHECK-LABEL: test_vld4_u8
return vld4_u8(a);
- // CHECK: ld4 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_u16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x4_t [[TMP6]]
uint16x4x4_t test_vld4_u16(uint16_t const *a) {
- // CHECK-LABEL: test_vld4_u16
return vld4_u16(a);
- // CHECK: ld4 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_u32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x4_t [[TMP6]]
uint32x2x4_t test_vld4_u32(uint32_t const *a) {
- // CHECK-LABEL: test_vld4_u32
return vld4_u32(a);
- // CHECK: ld4 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_u64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x4_t [[TMP6]]
uint64x1x4_t test_vld4_u64(uint64_t const *a) {
- // CHECK-LABEL: test_vld4_u64
return vld4_u64(a);
- // CHECK: {{ld1|ld4}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_s8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x4_t [[TMP5]]
int8x8x4_t test_vld4_s8(int8_t const *a) {
- // CHECK-LABEL: test_vld4_s8
return vld4_s8(a);
- // CHECK: ld4 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_s16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x4_t [[TMP6]]
int16x4x4_t test_vld4_s16(int16_t const *a) {
- // CHECK-LABEL: test_vld4_s16
return vld4_s16(a);
- // CHECK: ld4 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_s32(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
+// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x4_t [[TMP6]]
int32x2x4_t test_vld4_s32(int32_t const *a) {
- // CHECK-LABEL: test_vld4_s32
return vld4_s32(a);
- // CHECK: ld4 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_s64(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
+// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x4_t [[TMP6]]
int64x1x4_t test_vld4_s64(int64_t const *a) {
- // CHECK-LABEL: test_vld4_s64
return vld4_s64(a);
- // CHECK: {{ld1|ld4}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_f16(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x4_t [[TMP6]]
float16x4x4_t test_vld4_f16(float16_t const *a) {
- // CHECK-LABEL: test_vld4_f16
return vld4_f16(a);
- // CHECK: ld4 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_f32(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
+// CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x4_t [[TMP6]]
float32x2x4_t test_vld4_f32(float32_t const *a) {
- // CHECK-LABEL: test_vld4_f32
return vld4_f32(a);
- // CHECK: ld4 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_f64(double* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
+// CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
+// CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float64x1x4_t [[TMP6]]
float64x1x4_t test_vld4_f64(float64_t const *a) {
- // CHECK-LABEL: test_vld4_f64
return vld4_f64(a);
- // CHECK: {{ld1|ld4}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_p8(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x4_t [[TMP5]]
poly8x8x4_t test_vld4_p8(poly8_t const *a) {
- // CHECK-LABEL: test_vld4_p8
return vld4_p8(a);
- // CHECK: ld4 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_p16(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
+// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x4_t [[TMP6]]
poly16x4x4_t test_vld4_p16(poly16_t const *a) {
- // CHECK-LABEL: test_vld4_p16
return vld4_p16(a);
- // CHECK: ld4 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
+// CHECK: ret void
void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
- // CHECK-LABEL: test_vst1q_u8
vst1q_u8(a, b);
- // CHECK: {{st1 { v[0-9]+.16b }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
- // CHECK-LABEL: test_vst1q_u16
vst1q_u16(a, b);
- // CHECK: {{st1 { v[0-9]+.8h }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
- // CHECK-LABEL: test_vst1q_u32
vst1q_u32(a, b);
- // CHECK: {{st1 { v[0-9]+.4s }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
- // CHECK-LABEL: test_vst1q_u64
vst1q_u64(a, b);
- // CHECK: {{st1 { v[0-9]+.2d }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
+// CHECK: ret void
void test_vst1q_s8(int8_t *a, int8x16_t b) {
- // CHECK-LABEL: test_vst1q_s8
vst1q_s8(a, b);
- // CHECK: {{st1 { v[0-9]+.16b }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_s16(int16_t *a, int16x8_t b) {
- // CHECK-LABEL: test_vst1q_s16
vst1q_s16(a, b);
- // CHECK: {{st1 { v[0-9]+.8h }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_s32(int32_t *a, int32x4_t b) {
- // CHECK-LABEL: test_vst1q_s32
vst1q_s32(a, b);
- // CHECK: {{st1 { v[0-9]+.4s }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_s64(int64_t *a, int64x2_t b) {
- // CHECK-LABEL: test_vst1q_s64
vst1q_s64(a, b);
- // CHECK: {{st1 { v[0-9]+.2d }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_f16(float16_t *a, float16x8_t b) {
- // CHECK-LABEL: test_vst1q_f16
vst1q_f16(a, b);
- // CHECK: {{st1 { v[0-9]+.8h }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK: store <4 x float> [[TMP3]], <4 x float>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_f32(float32_t *a, float32x4_t b) {
- // CHECK-LABEL: test_vst1q_f32
vst1q_f32(a, b);
- // CHECK: {{st1 { v[0-9]+.4s }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_f64(double* %a, <2 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK: store <2 x double> [[TMP3]], <2 x double>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_f64(float64_t *a, float64x2_t b) {
- // CHECK-LABEL: test_vst1q_f64
vst1q_f64(a, b);
- // CHECK: {{st1 { v[0-9]+.2d }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
+// CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]]
+// CHECK: ret void
void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
- // CHECK-LABEL: test_vst1q_p8
vst1q_p8(a, b);
- // CHECK: {{st1 { v[0-9]+.16b }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
+// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
+// CHECK: ret void
void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
- // CHECK-LABEL: test_vst1q_p16
vst1q_p16(a, b);
- // CHECK: {{st1 { v[0-9]+.8h }|str q[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
+// CHECK: ret void
void test_vst1_u8(uint8_t *a, uint8x8_t b) {
- // CHECK-LABEL: test_vst1_u8
vst1_u8(a, b);
- // CHECK: {{st1 { v[0-9]+.8b }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
+// CHECK: ret void
void test_vst1_u16(uint16_t *a, uint16x4_t b) {
- // CHECK-LABEL: test_vst1_u16
vst1_u16(a, b);
- // CHECK: {{st1 { v[0-9]+.4h }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
+// CHECK: ret void
void test_vst1_u32(uint32_t *a, uint32x2_t b) {
- // CHECK-LABEL: test_vst1_u32
vst1_u32(a, b);
- // CHECK: {{st1 { v[0-9]+.2s }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
+// CHECK: ret void
void test_vst1_u64(uint64_t *a, uint64x1_t b) {
- // CHECK-LABEL: test_vst1_u64
vst1_u64(a, b);
- // CHECK: {{st1 { v[0-9]+.1d }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
+// CHECK: ret void
void test_vst1_s8(int8_t *a, int8x8_t b) {
- // CHECK-LABEL: test_vst1_s8
vst1_s8(a, b);
- // CHECK: {{st1 { v[0-9]+.8b }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
+// CHECK: ret void
void test_vst1_s16(int16_t *a, int16x4_t b) {
- // CHECK-LABEL: test_vst1_s16
vst1_s16(a, b);
- // CHECK: {{st1 { v[0-9]+.4h }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
+// CHECK: ret void
void test_vst1_s32(int32_t *a, int32x2_t b) {
- // CHECK-LABEL: test_vst1_s32
vst1_s32(a, b);
- // CHECK: {{st1 { v[0-9]+.2s }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
+// CHECK: ret void
void test_vst1_s64(int64_t *a, int64x1_t b) {
- // CHECK-LABEL: test_vst1_s64
vst1_s64(a, b);
- // CHECK: {{st1 { v[0-9]+.1d }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
+// CHECK: ret void
void test_vst1_f16(float16_t *a, float16x4_t b) {
- // CHECK-LABEL: test_vst1_f16
vst1_f16(a, b);
- // CHECK: {{st1 { v[0-9]+.4h }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK: store <2 x float> [[TMP3]], <2 x float>* [[TMP2]]
+// CHECK: ret void
void test_vst1_f32(float32_t *a, float32x2_t b) {
- // CHECK-LABEL: test_vst1_f32
vst1_f32(a, b);
- // CHECK: {{st1 { v[0-9]+.2s }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_f64(double* %a, <1 x double> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK: store <1 x double> [[TMP3]], <1 x double>* [[TMP2]]
+// CHECK: ret void
void test_vst1_f64(float64_t *a, float64x1_t b) {
- // CHECK-LABEL: test_vst1_f64
vst1_f64(a, b);
- // CHECK: {{st1 { v[0-9]+.1d }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
+// CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]]
+// CHECK: ret void
void test_vst1_p8(poly8_t *a, poly8x8_t b) {
- // CHECK-LABEL: test_vst1_p8
vst1_p8(a, b);
- // CHECK: {{st1 { v[0-9]+.8b }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 {
+// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
+// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
+// CHECK: ret void
void test_vst1_p16(poly16_t *a, poly16x4_t b) {
- // CHECK-LABEL: test_vst1_p16
vst1_p16(a, b);
- // CHECK: {{st1 { v[0-9]+.4h }|str d[0-9]+}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16
+// CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
+// CHECK: ret void
void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
- // CHECK-LABEL: test_vst2q_u8
vst2q_u8(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
- // CHECK-LABEL: test_vst2q_u16
vst2q_u16(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
- // CHECK-LABEL: test_vst2q_u32
vst2q_u32(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
- // CHECK-LABEL: test_vst2q_u64
vst2q_u64(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16
+// CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
+// CHECK: ret void
void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
- // CHECK-LABEL: test_vst2q_s8
vst2q_s8(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
- // CHECK-LABEL: test_vst2q_s16
vst2q_s16(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
- // CHECK-LABEL: test_vst2q_s32
vst2q_s32(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
- // CHECK-LABEL: test_vst2q_s64
vst2q_s64(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
- // CHECK-LABEL: test_vst2q_f16
vst2q_f16(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK: call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
- // CHECK-LABEL: test_vst2q_f32
vst2q_f32(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK: call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
- // CHECK-LABEL: test_vst2q_f64
vst2q_f64(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16
+// CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
+// CHECK: ret void
void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
- // CHECK-LABEL: test_vst2q_p8
vst2q_p8(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
- // CHECK-LABEL: test_vst2q_p16
vst2q_p16(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
+// CHECK: ret void
void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
- // CHECK-LABEL: test_vst2_u8
vst2_u8(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
- // CHECK-LABEL: test_vst2_u16
vst2_u16(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
- // CHECK-LABEL: test_vst2_u32
vst2_u32(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
- // CHECK-LABEL: test_vst2_u64
vst2_u64(a, b);
- // CHECK: {{st1|st2}} {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
+// CHECK: ret void
void test_vst2_s8(int8_t *a, int8x8x2_t b) {
- // CHECK-LABEL: test_vst2_s8
vst2_s8(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_s16(int16_t *a, int16x4x2_t b) {
- // CHECK-LABEL: test_vst2_s16
vst2_s16(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_s32(int32_t *a, int32x2x2_t b) {
- // CHECK-LABEL: test_vst2_s32
vst2_s32(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_s64(int64_t *a, int64x1x2_t b) {
- // CHECK-LABEL: test_vst2_s64
vst2_s64(a, b);
- // CHECK: {{st1|st2}} {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_f16(float16_t *a, float16x4x2_t b) {
- // CHECK-LABEL: test_vst2_f16
vst2_f16(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK: call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_f32(float32_t *a, float32x2x2_t b) {
- // CHECK-LABEL: test_vst2_f32
vst2_f32(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// CHECK: call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_f64(float64_t *a, float64x1x2_t b) {
- // CHECK-LABEL: test_vst2_f64
vst2_f64(a, b);
- // CHECK: {{st1|st2}} {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
+// CHECK: ret void
void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
- // CHECK-LABEL: test_vst2_p8
vst2_p8(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
+// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
+// CHECK: ret void
void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
- // CHECK-LABEL: test_vst2_p16
vst2_p16(a, b);
- // CHECK: st2 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16
+// CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
+// CHECK: ret void
void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
- // CHECK-LABEL: test_vst3q_u8
vst3q_u8(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
- // CHECK-LABEL: test_vst3q_u16
vst3q_u16(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
- // CHECK-LABEL: test_vst3q_u32
vst3q_u32(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
- // CHECK-LABEL: test_vst3q_u64
vst3q_u64(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16
+// CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
+// CHECK: ret void
void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
- // CHECK-LABEL: test_vst3q_s8
vst3q_s8(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
- // CHECK-LABEL: test_vst3q_s16
vst3q_s16(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
- // CHECK-LABEL: test_vst3q_s32
vst3q_s32(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
- // CHECK-LABEL: test_vst3q_s64
vst3q_s64(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
- // CHECK-LABEL: test_vst3q_f16
vst3q_f16(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
+// CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP1]]0, <4 x float> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
- // CHECK-LABEL: test_vst3q_f32
vst3q_f32(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP1]]0, <2 x double> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
- // CHECK-LABEL: test_vst3q_f64
vst3q_f64(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16
+// CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
+// CHECK: ret void
void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
- // CHECK-LABEL: test_vst3q_p8
vst3q_p8(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
- // CHECK-LABEL: test_vst3q_p16
vst3q_p16(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
+// CHECK: ret void
void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
- // CHECK-LABEL: test_vst3_u8
vst3_u8(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
- // CHECK-LABEL: test_vst3_u16
vst3_u16(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
- // CHECK-LABEL: test_vst3_u32
vst3_u32(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
- // CHECK-LABEL: test_vst3_u64
vst3_u64(a, b);
- // CHECK: {{st1|st3}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
+// CHECK: ret void
void test_vst3_s8(int8_t *a, int8x8x3_t b) {
- // CHECK-LABEL: test_vst3_s8
vst3_s8(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_s16(int16_t *a, int16x4x3_t b) {
- // CHECK-LABEL: test_vst3_s16
vst3_s16(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_s32(int32_t *a, int32x2x3_t b) {
- // CHECK-LABEL: test_vst3_s32
vst3_s32(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_s64(int64_t *a, int64x1x3_t b) {
- // CHECK-LABEL: test_vst3_s64
vst3_s64(a, b);
- // CHECK: {{st1|st3}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_f16(float16_t *a, float16x4x3_t b) {
- // CHECK-LABEL: test_vst3_f16
vst3_f16(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
+// CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP1]]0, <2 x float> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_f32(float32_t *a, float32x2x3_t b) {
- // CHECK-LABEL: test_vst3_f32
vst3_f32(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP1]]0, <1 x double> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_f64(float64_t *a, float64x1x3_t b) {
- // CHECK-LABEL: test_vst3_f64
vst3_f64(a, b);
- // CHECK: {{st1|st3}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
+// CHECK: ret void
void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
- // CHECK-LABEL: test_vst3_p8
vst3_p8(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
+// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i8* [[TMP2]])
+// CHECK: ret void
void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
- // CHECK-LABEL: test_vst3_p16
vst3_p16(a, b);
- // CHECK: st3 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16
+// CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
+// CHECK: ret void
void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
- // CHECK-LABEL: test_vst4q_u8
vst4q_u8(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
- // CHECK-LABEL: test_vst4q_u16
vst4q_u16(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x i32>
+// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
- // CHECK-LABEL: test_vst4q_u32
vst4q_u32(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64>
+// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
- // CHECK-LABEL: test_vst4q_u64
vst4q_u64(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16
+// CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
+// CHECK: ret void
void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
- // CHECK-LABEL: test_vst4q_s8
vst4q_s8(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
- // CHECK-LABEL: test_vst4q_s16
vst4q_s16(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x i32>
+// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
- // CHECK-LABEL: test_vst4q_s32
vst4q_s32(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64>
+// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
- // CHECK-LABEL: test_vst4q_s64
vst4q_s64(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
- // CHECK-LABEL: test_vst4q_f16
vst4q_f16(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x float>
+// CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP1]]1, <4 x float> [[TMP1]]2, <4 x float> [[TMP1]]3, <4 x float> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
- // CHECK-LABEL: test_vst4q_f32
vst4q_f32(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x double>
+// CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP1]]1, <2 x double> [[TMP1]]2, <2 x double> [[TMP1]]3, <2 x double> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
- // CHECK-LABEL: test_vst4q_f64
vst4q_f64(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16
+// CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
+// CHECK: ret void
void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
- // CHECK-LABEL: test_vst4q_p8
vst4q_p8(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16
+// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16
+// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16
+// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16>
+// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
- // CHECK-LABEL: test_vst4q_p16
vst4q_p16(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8
+// CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
+// CHECK: ret void
void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
- // CHECK-LABEL: test_vst4_u8
vst4_u8(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
- // CHECK-LABEL: test_vst4_u16
vst4_u16(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x i32>
+// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
- // CHECK-LABEL: test_vst4_u32
vst4_u32(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64>
+// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
- // CHECK-LABEL: test_vst4_u64
vst4_u64(a, b);
- // CHECK: {{st1|st4}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8
+// CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
+// CHECK: ret void
void test_vst4_s8(int8_t *a, int8x8x4_t b) {
- // CHECK-LABEL: test_vst4_s8
vst4_s8(a, b);
-// CHECK: st4 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_s16(int16_t *a, int16x4x4_t b) {
- // CHECK-LABEL: test_vst4_s16
vst4_s16(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x i32>
+// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_s32(int32_t *a, int32x2x4_t b) {
- // CHECK-LABEL: test_vst4_s32
vst4_s32(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64>
+// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_s64(int64_t *a, int64x1x4_t b) {
- // CHECK-LABEL: test_vst4_s64
vst4_s64(a, b);
- // CHECK: {{st1|st4}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_f16(float16_t *a, float16x4x4_t b) {
- // CHECK-LABEL: test_vst4_f16
vst4_f16(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x float>
+// CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP1]]1, <2 x float> [[TMP1]]2, <2 x float> [[TMP1]]3, <2 x float> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_f32(float32_t *a, float32x2x4_t b) {
- // CHECK-LABEL: test_vst4_f32
vst4_f32(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x double>
+// CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP1]]1, <1 x double> [[TMP1]]2, <1 x double> [[TMP1]]3, <1 x double> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_f64(float64_t *a, float64x1x4_t b) {
- // CHECK-LABEL: test_vst4_f64
vst4_f64(a, b);
- // CHECK: {{st1|st4}} {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8
+// CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
+// CHECK: ret void
void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
- // CHECK-LABEL: test_vst4_p8
vst4_p8(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
+// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
+// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
+// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
+// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1
+// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8
+// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2
+// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8
+// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
+// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3
+// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8
+// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16>
+// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i8* [[TMP2]])
+// CHECK: ret void
void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
- // CHECK-LABEL: test_vst4_p16
vst4_p16(a, b);
- // CHECK: st4 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld1q_u8_x2(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint8x16x2_t [[TMP4]]
uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
- // CHECK-LABEL: test_vld1q_u8_x2
return vld1q_u8_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld1q_u16_x2(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x2_t [[TMP6]]
uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
- // CHECK-LABEL: test_vld1q_u16_x2
return vld1q_u16_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld1q_u32_x2(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x2_t [[TMP6]]
uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
- // CHECK-LABEL: test_vld1q_u32_x2
return vld1q_u32_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld1q_u64_x2(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint64x2x2_t [[TMP6]]
uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
- // CHECK-LABEL: test_vld1q_u64_x2
return vld1q_u64_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int8x16x2_t [[TMP4]]
int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
- // CHECK-LABEL: test_vld1q_s8_x2
return vld1q_s8_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x2_t [[TMP6]]
int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
- // CHECK-LABEL: test_vld1q_s16_x2
return vld1q_s16_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x2_t [[TMP6]]
int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
- // CHECK-LABEL: test_vld1q_s32_x2
return vld1q_s32_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int64x2x2_t [[TMP6]]
int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
- // CHECK-LABEL: test_vld1q_s64_x2
return vld1q_s64_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x8x2_t @test_vld1q_f16_x2(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x2_t [[TMP6]]
float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
- // CHECK-LABEL: test_vld1q_f16_x2
return vld1q_f16_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x2_t [[TMP6]]
float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
- // CHECK-LABEL: test_vld1q_f32_x2
return vld1q_f32_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
+// CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float64x2x2_t [[TMP6]]
float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
- // CHECK-LABEL: test_vld1q_f64_x2
return vld1q_f64_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld1q_p8_x2(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly8x16x2_t [[TMP4]]
poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
- // CHECK-LABEL: test_vld1q_p8_x2
return vld1q_p8_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld1q_p16_x2(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x2_t [[TMP6]]
poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
- // CHECK-LABEL: test_vld1q_p16_x2
return vld1q_p16_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld1q_p64_x2(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly64x2x2_t [[TMP6]]
poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
- // CHECK-LABEL: test_vld1q_p64_x2
return vld1q_p64_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld1_u8_x2(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x2_t [[TMP4]]
uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
- // CHECK-LABEL: test_vld1_u8_x2
return vld1_u8_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld1_u16_x2(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x2_t [[TMP6]]
uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
- // CHECK-LABEL: test_vld1_u16_x2
return vld1_u16_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld1_u32_x2(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x2_t [[TMP6]]
uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
- // CHECK-LABEL: test_vld1_u32_x2
return vld1_u32_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld1_u64_x2(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x2_t [[TMP6]]
uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
- // CHECK-LABEL: test_vld1_u64_x2
return vld1_u64_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x2_t [[TMP4]]
int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
- // CHECK-LABEL: test_vld1_s8_x2
return vld1_s8_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x2_t [[TMP6]]
int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
- // CHECK-LABEL: test_vld1_s16_x2
return vld1_s16_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x2_t [[TMP6]]
int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
- // CHECK-LABEL: test_vld1_s32_x2
return vld1_s32_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x2_t [[TMP6]]
int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
- // CHECK-LABEL: test_vld1_s64_x2
return vld1_s64_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x4x2_t @test_vld1_f16_x2(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x2_t [[TMP6]]
float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
- // CHECK-LABEL: test_vld1_f16_x2
return vld1_f16_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
+// CHECK: store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float32x2x2_t [[TMP6]]
float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
- // CHECK-LABEL: test_vld1_f32_x2
return vld1_f32_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
+// CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
+// CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float64x1x2_t [[TMP6]]
float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
- // CHECK-LABEL: test_vld1_f64_x2
return vld1_f64_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld1_p8_x2(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly8x8x2_t [[TMP4]]
poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
- // CHECK-LABEL: test_vld1_p8_x2
return vld1_p8_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld1_p16_x2(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly16x4x2_t [[TMP6]]
poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
- // CHECK-LABEL: test_vld1_p16_x2
return vld1_p16_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld1_p64_x2(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
+// CHECK: ret %struct.poly64x1x2_t [[TMP6]]
poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
- // CHECK-LABEL: test_vld1_p64_x2
return vld1_p64_x2(a);
- // CHECK: ld1 {{{ ?v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld1q_u8_x3(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint8x16x3_t [[TMP4]]
uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
- // CHECK-LABEL: test_vld1q_u8_x3
return vld1q_u8_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld1q_u16_x3(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint16x8x3_t [[TMP6]]
uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
- // CHECK-LABEL: test_vld1q_u16_x3
return vld1q_u16_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld1q_u32_x3(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint32x4x3_t [[TMP6]]
uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
- // CHECK-LABEL: test_vld1q_u32_x3
return vld1q_u32_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld1q_u64_x3(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.uint64x2x3_t [[TMP6]]
uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
- // CHECK-LABEL: test_vld1q_u64_x3
return vld1q_u64_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int8x16x3_t [[TMP4]]
int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
- // CHECK-LABEL: test_vld1q_s8_x3
return vld1q_s8_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int16x8x3_t [[TMP6]]
int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
- // CHECK-LABEL: test_vld1q_s16_x3
return vld1q_s16_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
+// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int32x4x3_t [[TMP6]]
int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
- // CHECK-LABEL: test_vld1q_s32_x3
return vld1q_s32_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.int64x2x3_t [[TMP6]]
int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
- // CHECK-LABEL: test_vld1q_s64_x3
return vld1q_s64_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x8x3_t @test_vld1q_f16_x3(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float16x8x3_t [[TMP6]]
float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
- // CHECK-LABEL: test_vld1q_f16_x3
return vld1q_f16_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
+// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float32x4x3_t [[TMP6]]
float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
- // CHECK-LABEL: test_vld1q_f32_x3
return vld1q_f32_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
+// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.float64x2x3_t [[TMP6]]
float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
- // CHECK-LABEL: test_vld1q_f64_x3
return vld1q_f64_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld1q_p8_x3(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
+// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly8x16x3_t [[TMP4]]
poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
- // CHECK-LABEL: test_vld1q_p8_x3
return vld1q_p8_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld1q_p16_x3(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
+// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly16x8x3_t [[TMP6]]
poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
- // CHECK-LABEL: test_vld1q_p16_x3
return vld1q_p16_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld1q_p64_x3(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
+// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
+// CHECK: ret %struct.poly64x2x3_t [[TMP6]]
poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
- // CHECK-LABEL: test_vld1q_p64_x3
return vld1q_p64_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld1_u8_x3(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint8x8x3_t [[TMP4]]
uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
- // CHECK-LABEL: test_vld1_u8_x3
return vld1_u8_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld1_u16_x3(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint16x4x3_t [[TMP6]]
uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
- // CHECK-LABEL: test_vld1_u16_x3
return vld1_u16_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld1_u32_x3(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint32x2x3_t [[TMP6]]
uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
- // CHECK-LABEL: test_vld1_u32_x3
return vld1_u32_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld1_u64_x3(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.uint64x1x3_t [[TMP6]]
uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
- // CHECK-LABEL: test_vld1_u64_x3
return vld1_u64_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
+// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
+// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
+// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int8x8x3_t [[TMP4]]
int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
- // CHECK-LABEL: test_vld1_s8_x3
return vld1_s8_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int16x4x3_t [[TMP6]]
int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
- // CHECK-LABEL: test_vld1_s16_x3
return vld1_s16_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
+// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int32x2x3_t [[TMP6]]
int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
- // CHECK-LABEL: test_vld1_s32_x3
return vld1_s32_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
+// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
+// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.int64x1x3_t [[TMP6]]
int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
- // CHECK-LABEL: test_vld1_s64_x3
return vld1_s64_x3(a);
- // CHECK: ld1 {{{ ?v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d ?}}}, [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define %struct.float16x4x3_t @test_vld1_f16_x3(half* %a) #0 {
+// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
+// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
+// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
+// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
+// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
+// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
+// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
+// CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
+// CHECK: ret %struct.float16x4x3_t [[TMP6]]
float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
- // CHECK-LABEL: test_vld1_f16_x3
return vld1_f16_x3(a);
[... 9138 lines stripped ...]
More information about the cfe-commits
mailing list