[llvm] 3001617 - [AArch64] Add tests for scalar_to_vector(load) and extend load into zero tests. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 01:34:19 PDT 2024
Author: David Green
Date: 2024-09-11T09:34:14+01:00
New Revision: 300161761df54f5f85630a8ad0e170d09d119ee3
URL: https://github.com/llvm/llvm-project/commit/300161761df54f5f85630a8ad0e170d09d119ee3
DIFF: https://github.com/llvm/llvm-project/commit/300161761df54f5f85630a8ad0e170d09d119ee3.diff
LOG: [AArch64] Add tests for scalar_to_vector(load) and extend load into zero tests. NFC
Added:
llvm/test/CodeGen/AArch64/load-insert-undef.ll
Modified:
llvm/test/CodeGen/AArch64/load-insert-zero.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/load-insert-undef.ll b/llvm/test/CodeGen/AArch64/load-insert-undef.ll
new file mode 100644
index 00000000000000..1e776d1c06fcb3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load-insert-undef.ll
@@ -0,0 +1,1098 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+bf16,+sve | FileCheck %s
+
+define <8 x i8> @loadv8i8(ptr %p) {
+; CHECK-LABEL: loadv8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0]
+; CHECK-NEXT: ret
+ %l = load i8, ptr %p
+ %v = insertelement <8 x i8> poison, i8 %l, i32 0
+ ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8(ptr %p) {
+; CHECK-LABEL: loadv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0]
+; CHECK-NEXT: ret
+ %l = load i8, ptr %p
+ %v = insertelement <16 x i8> poison, i8 %l, i32 0
+ ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16(ptr %p) {
+; CHECK-LABEL: loadv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load i16, ptr %p
+ %v = insertelement <4 x i16> poison, i16 %l, i32 0
+ ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16(ptr %p) {
+; CHECK-LABEL: loadv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load i16, ptr %p
+ %v = insertelement <8 x i16> poison, i16 %l, i32 0
+ ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32(ptr %p) {
+; CHECK-LABEL: loadv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ret
+ %l = load i32, ptr %p
+ %v = insertelement <2 x i32> poison, i32 %l, i32 0
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32(ptr %p) {
+; CHECK-LABEL: loadv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ret
+ %l = load i32, ptr %p
+ %v = insertelement <4 x i32> poison, i32 %l, i32 0
+ ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64(ptr %p) {
+; CHECK-LABEL: loadv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %l = load i64, ptr %p
+ %v = insertelement <2 x i64> poison, i64 %l, i32 0
+ ret <2 x i64> %v
+}
+
+
+define <4 x half> @loadv4f16(ptr %p) {
+; CHECK-LABEL: loadv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load half, ptr %p
+ %v = insertelement <4 x half> poison, half %l, i32 0
+ ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16(ptr %p) {
+; CHECK-LABEL: loadv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load half, ptr %p
+ %v = insertelement <8 x half> poison, half %l, i32 0
+ ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16(ptr %p) {
+; CHECK-LABEL: loadv4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load bfloat, ptr %p
+ %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16(ptr %p) {
+; CHECK-LABEL: loadv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load bfloat, ptr %p
+ %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+ ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32(ptr %p) {
+; CHECK-LABEL: loadv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ret
+ %l = load float, ptr %p
+ %v = insertelement <2 x float> poison, float %l, i32 0
+ ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32(ptr %p) {
+; CHECK-LABEL: loadv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ret
+ %l = load float, ptr %p
+ %v = insertelement <4 x float> poison, float %l, i32 0
+ ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64(ptr %p) {
+; CHECK-LABEL: loadv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %l = load double, ptr %p
+ %v = insertelement <2 x double> poison, double %l, i32 0
+ ret <2 x double> %v
+}
+
+
+; Unscaled
+
+define <8 x i8> @loadv8i8_offset(ptr %p) {
+; CHECK-LABEL: loadv8i8_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i8, ptr %g
+ %v = insertelement <8 x i8> poison, i8 %l, i32 0
+ ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_offset(ptr %p) {
+; CHECK-LABEL: loadv16i8_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i8, ptr %g
+ %v = insertelement <16 x i8> poison, i8 %l, i32 0
+ ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_offset(ptr %p) {
+; CHECK-LABEL: loadv4i16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldurh w8, [x0, #1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i16, ptr %g
+ %v = insertelement <4 x i16> poison, i16 %l, i32 0
+ ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_offset(ptr %p) {
+; CHECK-LABEL: loadv8i16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldurh w8, [x0, #1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i16, ptr %g
+ %v = insertelement <8 x i16> poison, i16 %l, i32 0
+ ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_offset(ptr %p) {
+; CHECK-LABEL: loadv2i32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur w8, [x0, #1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i32, ptr %g
+ %v = insertelement <2 x i32> poison, i32 %l, i32 0
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_offset(ptr %p) {
+; CHECK-LABEL: loadv4i32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur w8, [x0, #1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i32, ptr %g
+ %v = insertelement <4 x i32> poison, i32 %l, i32 0
+ ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_offset(ptr %p) {
+; CHECK-LABEL: loadv2i64_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur x8, [x0, #1]
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i64, ptr %g
+ %v = insertelement <2 x i64> poison, i64 %l, i32 0
+ ret <2 x i64> %v
+}
+
+
+define <4 x half> @loadv4f16_offset(ptr %p) {
+; CHECK-LABEL: loadv4f16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load half, ptr %g
+ %v = insertelement <4 x half> poison, half %l, i32 0
+ ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_offset(ptr %p) {
+; CHECK-LABEL: loadv8f16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load half, ptr %g
+ %v = insertelement <8 x half> poison, half %l, i32 0
+ ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_offset(ptr %p) {
+; CHECK-LABEL: loadv4bf16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load bfloat, ptr %g
+ %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_offset(ptr %p) {
+; CHECK-LABEL: loadv8bf16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load bfloat, ptr %g
+ %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+ ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_offset(ptr %p) {
+; CHECK-LABEL: loadv2f32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load float, ptr %g
+ %v = insertelement <2 x float> poison, float %l, i32 0
+ ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_offset(ptr %p) {
+; CHECK-LABEL: loadv4f32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load float, ptr %g
+ %v = insertelement <4 x float> poison, float %l, i32 0
+ ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_offset(ptr %p) {
+; CHECK-LABEL: loadv2f64_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur d0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load double, ptr %g
+ %v = insertelement <2 x double> poison, double %l, i32 0
+ ret <2 x double> %v
+}
+
+
+define <8 x i8> @loadv8i8_noffset(ptr %p) {
+; CHECK-LABEL: loadv8i8_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldurb w8, [x0, #-1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i8, ptr %g
+ %v = insertelement <8 x i8> poison, i8 %l, i32 0
+ ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_noffset(ptr %p) {
+; CHECK-LABEL: loadv16i8_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldurb w8, [x0, #-1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i8, ptr %g
+ %v = insertelement <16 x i8> poison, i8 %l, i32 0
+ ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4i16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldurh w8, [x0, #-1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i16, ptr %g
+ %v = insertelement <4 x i16> poison, i16 %l, i32 0
+ ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8i16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldurh w8, [x0, #-1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i16, ptr %g
+ %v = insertelement <8 x i16> poison, i16 %l, i32 0
+ ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_noffset(ptr %p) {
+; CHECK-LABEL: loadv2i32_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur w8, [x0, #-1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i32, ptr %g
+ %v = insertelement <2 x i32> poison, i32 %l, i32 0
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_noffset(ptr %p) {
+; CHECK-LABEL: loadv4i32_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur w8, [x0, #-1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i32, ptr %g
+ %v = insertelement <4 x i32> poison, i32 %l, i32 0
+ ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_noffset(ptr %p) {
+; CHECK-LABEL: loadv2i64_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur x8, [x0, #-1]
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i64, ptr %g
+ %v = insertelement <2 x i64> poison, i64 %l, i32 0
+ ret <2 x i64> %v
+}
+
+
+define <4 x half> @loadv4f16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4f16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load half, ptr %g
+ %v = insertelement <4 x half> poison, half %l, i32 0
+ ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8f16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load half, ptr %g
+ %v = insertelement <8 x half> poison, half %l, i32 0
+ ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4bf16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load bfloat, ptr %g
+ %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8bf16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load bfloat, ptr %g
+ %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+ ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_noffset(ptr %p) {
+; CHECK-LABEL: loadv2f32_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load float, ptr %g
+ %v = insertelement <2 x float> poison, float %l, i32 0
+ ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_noffset(ptr %p) {
+; CHECK-LABEL: loadv4f32_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load float, ptr %g
+ %v = insertelement <4 x float> poison, float %l, i32 0
+ ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_noffset(ptr %p) {
+; CHECK-LABEL: loadv2f64_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur d0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load double, ptr %g
+ %v = insertelement <2 x double> poison, double %l, i32 0
+ ret <2 x double> %v
+}
+
+
+; ROW addressing modes
+
+define <8 x i8> @loadv8i8_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8i8_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, w1, sxtw]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i32 %o
+ %l = load i8, ptr %g
+ %v = insertelement <8 x i8> poison, i8 %l, i32 0
+ ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv16i8_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, w1, sxtw]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i32 %o
+ %l = load i8, ptr %g
+ %v = insertelement <16 x i8> poison, i8 %l, i32 0
+ ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4i16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i16, ptr %p, i32 %o
+ %l = load i16, ptr %g
+ %v = insertelement <4 x i16> poison, i16 %l, i32 0
+ ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8i16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i16, ptr %p, i32 %o
+ %l = load i16, ptr %g
+ %v = insertelement <8 x i16> poison, i16 %l, i32 0
+ ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2i32_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0, w1, sxtw #2]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i32, ptr %p, i32 %o
+ %l = load i32, ptr %g
+ %v = insertelement <2 x i32> poison, i32 %l, i32 0
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4i32_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0, w1, sxtw #2]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i32, ptr %p, i32 %o
+ %l = load i32, ptr %g
+ %v = insertelement <4 x i32> poison, i32 %l, i32 0
+ ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2i64_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0, w1, sxtw #3]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i64, ptr %p, i32 %o
+ %l = load i64, ptr %g
+ %v = insertelement <2 x i64> poison, i64 %l, i32 0
+ ret <2 x i64> %v
+}
+
+define <4 x half> @loadv4f16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4f16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds half, ptr %p, i32 %o
+ %l = load half, ptr %g
+ %v = insertelement <4 x half> poison, half %l, i32 0
+ ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8f16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds half, ptr %p, i32 %o
+ %l = load half, ptr %g
+ %v = insertelement <8 x half> poison, half %l, i32 0
+ ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4bf16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds bfloat, ptr %p, i32 %o
+ %l = load bfloat, ptr %g
+ %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8bf16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds bfloat, ptr %p, i32 %o
+ %l = load bfloat, ptr %g
+ %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+ ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2f32_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0, w1, sxtw #2]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds float, ptr %p, i32 %o
+ %l = load float, ptr %g
+ %v = insertelement <2 x float> poison, float %l, i32 0
+ ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4f32_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0, w1, sxtw #2]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds float, ptr %p, i32 %o
+ %l = load float, ptr %g
+ %v = insertelement <4 x float> poison, float %l, i32 0
+ ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2f64_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0, w1, sxtw #3]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds double, ptr %p, i32 %o
+ %l = load double, ptr %g
+ %v = insertelement <2 x double> poison, double %l, i32 0
+ ret <2 x double> %v
+}
+
+; roX
+
+define <8 x i8> @loadv8i8_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8i8_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, x1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 %o
+ %l = load i8, ptr %g
+ %v = insertelement <8 x i8> poison, i8 %l, i32 0
+ ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv16i8_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, x1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 %o
+ %l = load i8, ptr %g
+ %v = insertelement <16 x i8> poison, i8 %l, i32 0
+ ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4i16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i16, ptr %p, i64 %o
+ %l = load i16, ptr %g
+ %v = insertelement <4 x i16> poison, i16 %l, i32 0
+ ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8i16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i16, ptr %p, i64 %o
+ %l = load i16, ptr %g
+ %v = insertelement <8 x i16> poison, i16 %l, i32 0
+ ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2i32_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i32, ptr %p, i64 %o
+ %l = load i32, ptr %g
+ %v = insertelement <2 x i32> poison, i32 %l, i32 0
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4i32_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i32, ptr %p, i64 %o
+ %l = load i32, ptr %g
+ %v = insertelement <4 x i32> poison, i32 %l, i32 0
+ ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2i64_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i64, ptr %p, i64 %o
+ %l = load i64, ptr %g
+ %v = insertelement <2 x i64> poison, i64 %l, i32 0
+ ret <2 x i64> %v
+}
+
+define <4 x half> @loadv4f16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4f16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds half, ptr %p, i64 %o
+ %l = load half, ptr %g
+ %v = insertelement <4 x half> poison, half %l, i32 0
+ ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8f16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds half, ptr %p, i64 %o
+ %l = load half, ptr %g
+ %v = insertelement <8 x half> poison, half %l, i32 0
+ ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4bf16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds bfloat, ptr %p, i64 %o
+ %l = load bfloat, ptr %g
+ %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8bf16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds bfloat, ptr %p, i64 %o
+ %l = load bfloat, ptr %g
+ %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+ ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2f32_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds float, ptr %p, i64 %o
+ %l = load float, ptr %g
+ %v = insertelement <2 x float> poison, float %l, i32 0
+ ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4f32_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds float, ptr %p, i64 %o
+ %l = load float, ptr %g
+ %v = insertelement <4 x float> poison, float %l, i32 0
+ ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2f64_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds double, ptr %p, i64 %o
+ %l = load double, ptr %g
+ %v = insertelement <2 x double> poison, double %l, i32 0
+ ret <2 x double> %v
+}
+
+
+; SVE
+
+define <vscale x 8 x i8> @loadnxv8i8(ptr %p) {
+; CHECK-LABEL: loadnxv8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrb w8, [x0]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %l = load i8, ptr %p
+ %v = insertelement <vscale x 8 x i8> poison, i8 %l, i32 0
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 16 x i8> @loadnxv16i8(ptr %p) {
+; CHECK-LABEL: loadnxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrb w8, [x0]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %l = load i8, ptr %p
+ %v = insertelement <vscale x 16 x i8> poison, i8 %l, i32 0
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 4 x i16> @loadnxv4i16(ptr %p) {
+; CHECK-LABEL: loadnxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrh w8, [x0]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %l = load i16, ptr %p
+ %v = insertelement <vscale x 4 x i16> poison, i16 %l, i32 0
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 8 x i16> @loadnxv8i16(ptr %p) {
+; CHECK-LABEL: loadnxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrh w8, [x0]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %l = load i16, ptr %p
+ %v = insertelement <vscale x 8 x i16> poison, i16 %l, i32 0
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 2 x i32> @loadnxv2i32(ptr %p) {
+; CHECK-LABEL: loadnxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %l = load i32, ptr %p
+ %v = insertelement <vscale x 2 x i32> poison, i32 %l, i32 0
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 4 x i32> @loadnxv4i32(ptr %p) {
+; CHECK-LABEL: loadnxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %l = load i32, ptr %p
+ %v = insertelement <vscale x 4 x i32> poison, i32 %l, i32 0
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 2 x i64> @loadnxv2i64(ptr %p) {
+; CHECK-LABEL: loadnxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %l = load i64, ptr %p
+ %v = insertelement <vscale x 2 x i64> poison, i64 %l, i32 0
+ ret <vscale x 2 x i64> %v
+}
+
+
+define <vscale x 4 x half> @loadnxv4f16(ptr %p) {
+; CHECK-LABEL: loadnxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load half, ptr %p
+ %v = insertelement <vscale x 4 x half> poison, half %l, i32 0
+ ret <vscale x 4 x half> %v
+}
+
+define <vscale x 8 x half> @loadnxv8f16(ptr %p) {
+; CHECK-LABEL: loadnxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load half, ptr %p
+ %v = insertelement <vscale x 8 x half> poison, half %l, i32 0
+ ret <vscale x 8 x half> %v
+}
+
+define <vscale x 4 x bfloat> @loadnxv4bf16(ptr %p) {
+; CHECK-LABEL: loadnxv4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load bfloat, ptr %p
+ %v = insertelement <vscale x 4 x bfloat> poison, bfloat %l, i32 0
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @loadnxv8bf16(ptr %p) {
+; CHECK-LABEL: loadnxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+ %l = load bfloat, ptr %p
+ %v = insertelement <vscale x 8 x bfloat> poison, bfloat %l, i32 0
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 2 x float> @loadnxv2f32(ptr %p) {
+; CHECK-LABEL: loadnxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ret
+ %l = load float, ptr %p
+ %v = insertelement <vscale x 2 x float> poison, float %l, i32 0
+ ret <vscale x 2 x float> %v
+}
+
+define <vscale x 4 x float> @loadnxv4f32(ptr %p) {
+; CHECK-LABEL: loadnxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ret
+ %l = load float, ptr %p
+ %v = insertelement <vscale x 4 x float> poison, float %l, i32 0
+ ret <vscale x 4 x float> %v
+}
+
+define <vscale x 2 x double> @loadnxv2f64(ptr %p) {
+; CHECK-LABEL: loadnxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %l = load double, ptr %p
+ %v = insertelement <vscale x 2 x double> poison, double %l, i32 0
+ ret <vscale x 2 x double> %v
+}
+
+
+; Unscaled
+
+define <vscale x 8 x i8> @loadnxv8i8_offset(ptr %p) {
+; CHECK-LABEL: loadnxv8i8_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrb w8, [x0, #1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i8, ptr %g
+ %v = insertelement <vscale x 8 x i8> poison, i8 %l, i32 0
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 16 x i8> @loadnxv16i8_offset(ptr %p) {
+; CHECK-LABEL: loadnxv16i8_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrb w8, [x0, #1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i8, ptr %g
+ %v = insertelement <vscale x 16 x i8> poison, i8 %l, i32 0
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 4 x i16> @loadnxv4i16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4i16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldurh w8, [x0, #1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i16, ptr %g
+ %v = insertelement <vscale x 4 x i16> poison, i16 %l, i32 0
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 8 x i16> @loadnxv8i16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv8i16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldurh w8, [x0, #1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i16, ptr %g
+ %v = insertelement <vscale x 8 x i16> poison, i16 %l, i32 0
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 2 x i32> @loadnxv2i32_offset(ptr %p) {
+; CHECK-LABEL: loadnxv2i32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur w8, [x0, #1]
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i32, ptr %g
+ %v = insertelement <vscale x 2 x i32> poison, i32 %l, i32 0
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 4 x i32> @loadnxv4i32_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4i32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur w8, [x0, #1]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i32, ptr %g
+ %v = insertelement <vscale x 4 x i32> poison, i32 %l, i32 0
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 2 x i64> @loadnxv2i64_offset(ptr %p) {
+; CHECK-LABEL: loadnxv2i64_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur x8, [x0, #1]
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load i64, ptr %g
+ %v = insertelement <vscale x 2 x i64> poison, i64 %l, i32 0
+ ret <vscale x 2 x i64> %v
+}
+
+
+define <vscale x 4 x half> @loadnxv4f16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4f16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load half, ptr %g
+ %v = insertelement <vscale x 4 x half> poison, half %l, i32 0
+ ret <vscale x 4 x half> %v
+}
+
+define <vscale x 8 x half> @loadnxv8f16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv8f16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load half, ptr %g
+ %v = insertelement <vscale x 8 x half> poison, half %l, i32 0
+ ret <vscale x 8 x half> %v
+}
+
+define <vscale x 4 x bfloat> @loadnxv4bf16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4bf16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load bfloat, ptr %g
+ %v = insertelement <vscale x 4 x bfloat> poison, bfloat %l, i32 0
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @loadnxv8bf16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv8bf16_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load bfloat, ptr %g
+ %v = insertelement <vscale x 8 x bfloat> poison, bfloat %l, i32 0
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 2 x float> @loadnxv2f32_offset(ptr %p) {
+; CHECK-LABEL: loadnxv2f32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load float, ptr %g
+ %v = insertelement <vscale x 2 x float> poison, float %l, i32 0
+ ret <vscale x 2 x float> %v
+}
+
+define <vscale x 4 x float> @loadnxv4f32_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4f32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load float, ptr %g
+ %v = insertelement <vscale x 4 x float> poison, float %l, i32 0
+ ret <vscale x 4 x float> %v
+}
+
+define <vscale x 2 x double> @loadnxv2f64_offset(ptr %p) {
+; CHECK-LABEL: loadnxv2f64_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur d0, [x0, #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 1
+ %l = load double, ptr %g
+ %v = insertelement <vscale x 2 x double> poison, double %l, i32 0
+ ret <vscale x 2 x double> %v
+}
diff --git a/llvm/test/CodeGen/AArch64/load-insert-zero.ll b/llvm/test/CodeGen/AArch64/load-insert-zero.ll
index 23d545459295fc..ccbd6f03fbcc36 100644
--- a/llvm/test/CodeGen/AArch64/load-insert-zero.ll
+++ b/llvm/test/CodeGen/AArch64/load-insert-zero.ll
@@ -378,7 +378,6 @@ define <2 x i64> @loadv2i64_noffset(ptr %p) {
ret <2 x i64> %v
}
-
define <4 x half> @loadv4f16_noffset(ptr %p) {
; CHECK-LABEL: loadv4f16_noffset:
; CHECK: // %bb.0:
@@ -457,6 +456,328 @@ define <2 x double> @loadv2f64_noffset(ptr %p) {
}
+; ROW addressing modes
+
+define <8 x i8> @loadv8i8_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8i8_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, w1, sxtw]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i32 %o
+ %l = load i8, ptr %g
+ %v = insertelement <8 x i8> zeroinitializer, i8 %l, i32 0
+ ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv16i8_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, w1, sxtw]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i32 %o
+ %l = load i8, ptr %g
+ %v = insertelement <16 x i8> zeroinitializer, i8 %l, i32 0
+ ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4i16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i16, ptr %p, i32 %o
+ %l = load i16, ptr %g
+ %v = insertelement <4 x i16> zeroinitializer, i16 %l, i32 0
+ ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8i16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i16, ptr %p, i32 %o
+ %l = load i16, ptr %g
+ %v = insertelement <8 x i16> zeroinitializer, i16 %l, i32 0
+ ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2i32_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, w1, sxtw #2
+; CHECK-NEXT: ldr s0, [x8]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i32, ptr %p, i32 %o
+ %l = load i32, ptr %g
+ %v = insertelement <2 x i32> zeroinitializer, i32 %l, i32 0
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4i32_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, w1, sxtw #2
+; CHECK-NEXT: ldr s0, [x8]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i32, ptr %p, i32 %o
+ %l = load i32, ptr %g
+ %v = insertelement <4 x i32> zeroinitializer, i32 %l, i32 0
+ ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2i64_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0, w1, sxtw #3]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i64, ptr %p, i32 %o
+ %l = load i64, ptr %g
+ %v = insertelement <2 x i64> zeroinitializer, i64 %l, i32 0
+ ret <2 x i64> %v
+}
+
+define <4 x half> @loadv4f16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4f16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds half, ptr %p, i32 %o
+ %l = load half, ptr %g
+ %v = insertelement <4 x half> zeroinitializer, half %l, i32 0
+ ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8f16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds half, ptr %p, i32 %o
+ %l = load half, ptr %g
+ %v = insertelement <8 x half> zeroinitializer, half %l, i32 0
+ ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4bf16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds bfloat, ptr %p, i32 %o
+ %l = load bfloat, ptr %g
+ %v = insertelement <4 x bfloat> zeroinitializer, bfloat %l, i32 0
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8bf16_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds bfloat, ptr %p, i32 %o
+ %l = load bfloat, ptr %g
+ %v = insertelement <8 x bfloat> zeroinitializer, bfloat %l, i32 0
+ ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2f32_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, w1, sxtw #2
+; CHECK-NEXT: ldr s0, [x8]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds float, ptr %p, i32 %o
+ %l = load float, ptr %g
+ %v = insertelement <2 x float> zeroinitializer, float %l, i32 0
+ ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4f32_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, w1, sxtw #2
+; CHECK-NEXT: ldr s0, [x8]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds float, ptr %p, i32 %o
+ %l = load float, ptr %g
+ %v = insertelement <4 x float> zeroinitializer, float %l, i32 0
+ ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2f64_roW:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0, w1, sxtw #3]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds double, ptr %p, i32 %o
+ %l = load double, ptr %g
+ %v = insertelement <2 x double> zeroinitializer, double %l, i32 0
+ ret <2 x double> %v
+}
+
+
+; roX
+
+define <8 x i8> @loadv8i8_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8i8_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, x1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 %o
+ %l = load i8, ptr %g
+ %v = insertelement <8 x i8> zeroinitializer, i8 %l, i32 0
+ ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv16i8_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, x1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 %o
+ %l = load i8, ptr %g
+ %v = insertelement <16 x i8> zeroinitializer, i8 %l, i32 0
+ ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4i16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i16, ptr %p, i64 %o
+ %l = load i16, ptr %g
+ %v = insertelement <4 x i16> zeroinitializer, i16 %l, i32 0
+ ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8i16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i16, ptr %p, i64 %o
+ %l = load i16, ptr %g
+ %v = insertelement <8 x i16> zeroinitializer, i16 %l, i32 0
+ ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2i32_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, x1, lsl #2
+; CHECK-NEXT: ldr s0, [x8]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i32, ptr %p, i64 %o
+ %l = load i32, ptr %g
+ %v = insertelement <2 x i32> zeroinitializer, i32 %l, i32 0
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4i32_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, x1, lsl #2
+; CHECK-NEXT: ldr s0, [x8]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i32, ptr %p, i64 %o
+ %l = load i32, ptr %g
+ %v = insertelement <4 x i32> zeroinitializer, i32 %l, i32 0
+ ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2i64_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i64, ptr %p, i64 %o
+ %l = load i64, ptr %g
+ %v = insertelement <2 x i64> zeroinitializer, i64 %l, i32 0
+ ret <2 x i64> %v
+}
+
+define <4 x half> @loadv4f16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4f16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds half, ptr %p, i64 %o
+ %l = load half, ptr %g
+ %v = insertelement <4 x half> zeroinitializer, half %l, i32 0
+ ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8f16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds half, ptr %p, i64 %o
+ %l = load half, ptr %g
+ %v = insertelement <8 x half> zeroinitializer, half %l, i32 0
+ ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4bf16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds bfloat, ptr %p, i64 %o
+ %l = load bfloat, ptr %g
+ %v = insertelement <4 x bfloat> zeroinitializer, bfloat %l, i32 0
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8bf16_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds bfloat, ptr %p, i64 %o
+ %l = load bfloat, ptr %g
+ %v = insertelement <8 x bfloat> zeroinitializer, bfloat %l, i32 0
+ ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2f32_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, x1, lsl #2
+; CHECK-NEXT: ldr s0, [x8]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds float, ptr %p, i64 %o
+ %l = load float, ptr %g
+ %v = insertelement <2 x float> zeroinitializer, float %l, i32 0
+ ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4f32_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, x1, lsl #2
+; CHECK-NEXT: ldr s0, [x8]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds float, ptr %p, i64 %o
+ %l = load float, ptr %g
+ %v = insertelement <4 x float> zeroinitializer, float %l, i32 0
+ ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2f64_roX:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds double, ptr %p, i64 %o
+ %l = load double, ptr %g
+ %v = insertelement <2 x double> zeroinitializer, double %l, i32 0
+ ret <2 x double> %v
+}
+
+
define void @predictor_4x4_neon(ptr nocapture noundef writeonly %0, i64 noundef %1, ptr nocapture noundef readonly %2, ptr nocapture noundef readnone %3) {
; CHECK-LABEL: predictor_4x4_neon:
; CHECK: // %bb.0:
More information about the llvm-commits
mailing list