[llvm] 3001617 - [AArch64] Add tests for scalar_to_vector(load) and extend load into zero tests. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 11 01:34:19 PDT 2024


Author: David Green
Date: 2024-09-11T09:34:14+01:00
New Revision: 300161761df54f5f85630a8ad0e170d09d119ee3

URL: https://github.com/llvm/llvm-project/commit/300161761df54f5f85630a8ad0e170d09d119ee3
DIFF: https://github.com/llvm/llvm-project/commit/300161761df54f5f85630a8ad0e170d09d119ee3.diff

LOG: [AArch64] Add tests for scalar_to_vector(load) and extend load into zero tests. NFC

Added: 
    llvm/test/CodeGen/AArch64/load-insert-undef.ll

Modified: 
    llvm/test/CodeGen/AArch64/load-insert-zero.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/load-insert-undef.ll b/llvm/test/CodeGen/AArch64/load-insert-undef.ll
new file mode 100644
index 00000000000000..1e776d1c06fcb3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load-insert-undef.ll
@@ -0,0 +1,1098 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+bf16,+sve | FileCheck %s
+
+define <8 x i8> @loadv8i8(ptr %p) {
+; CHECK-LABEL: loadv8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0]
+; CHECK-NEXT:    ret
+  %l = load i8, ptr %p
+  %v = insertelement <8 x i8> poison, i8 %l, i32 0
+  ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8(ptr %p) {
+; CHECK-LABEL: loadv16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0]
+; CHECK-NEXT:    ret
+  %l = load i8, ptr %p
+  %v = insertelement <16 x i8> poison, i8 %l, i32 0
+  ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16(ptr %p) {
+; CHECK-LABEL: loadv4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load i16, ptr %p
+  %v = insertelement <4 x i16> poison, i16 %l, i32 0
+  ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16(ptr %p) {
+; CHECK-LABEL: loadv8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load i16, ptr %p
+  %v = insertelement <8 x i16> poison, i16 %l, i32 0
+  ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32(ptr %p) {
+; CHECK-LABEL: loadv2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+  %l = load i32, ptr %p
+  %v = insertelement <2 x i32> poison, i32 %l, i32 0
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32(ptr %p) {
+; CHECK-LABEL: loadv4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+  %l = load i32, ptr %p
+  %v = insertelement <4 x i32> poison, i32 %l, i32 0
+  ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64(ptr %p) {
+; CHECK-LABEL: loadv2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
+  %l = load i64, ptr %p
+  %v = insertelement <2 x i64> poison, i64 %l, i32 0
+  ret <2 x i64> %v
+}
+
+
+define <4 x half> @loadv4f16(ptr %p) {
+; CHECK-LABEL: loadv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load half, ptr %p
+  %v = insertelement <4 x half> poison, half %l, i32 0
+  ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16(ptr %p) {
+; CHECK-LABEL: loadv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load half, ptr %p
+  %v = insertelement <8 x half> poison, half %l, i32 0
+  ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16(ptr %p) {
+; CHECK-LABEL: loadv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load bfloat, ptr %p
+  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16(ptr %p) {
+; CHECK-LABEL: loadv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load bfloat, ptr %p
+  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+  ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32(ptr %p) {
+; CHECK-LABEL: loadv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+  %l = load float, ptr %p
+  %v = insertelement <2 x float> poison, float %l, i32 0
+  ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32(ptr %p) {
+; CHECK-LABEL: loadv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+  %l = load float, ptr %p
+  %v = insertelement <4 x float> poison, float %l, i32 0
+  ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64(ptr %p) {
+; CHECK-LABEL: loadv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
+  %l = load double, ptr %p
+  %v = insertelement <2 x double> poison, double %l, i32 0
+  ret <2 x double> %v
+}
+
+
+; Unscaled
+
+define <8 x i8> @loadv8i8_offset(ptr %p) {
+; CHECK-LABEL: loadv8i8_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i8, ptr %g
+  %v = insertelement <8 x i8> poison, i8 %l, i32 0
+  ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_offset(ptr %p) {
+; CHECK-LABEL: loadv16i8_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i8, ptr %g
+  %v = insertelement <16 x i8> poison, i8 %l, i32 0
+  ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_offset(ptr %p) {
+; CHECK-LABEL: loadv4i16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldurh w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i16, ptr %g
+  %v = insertelement <4 x i16> poison, i16 %l, i32 0
+  ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_offset(ptr %p) {
+; CHECK-LABEL: loadv8i16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldurh w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i16, ptr %g
+  %v = insertelement <8 x i16> poison, i16 %l, i32 0
+  ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_offset(ptr %p) {
+; CHECK-LABEL: loadv2i32_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i32, ptr %g
+  %v = insertelement <2 x i32> poison, i32 %l, i32 0
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_offset(ptr %p) {
+; CHECK-LABEL: loadv4i32_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i32, ptr %g
+  %v = insertelement <4 x i32> poison, i32 %l, i32 0
+  ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_offset(ptr %p) {
+; CHECK-LABEL: loadv2i64_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur x8, [x0, #1]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i64, ptr %g
+  %v = insertelement <2 x i64> poison, i64 %l, i32 0
+  ret <2 x i64> %v
+}
+
+
+define <4 x half> @loadv4f16_offset(ptr %p) {
+; CHECK-LABEL: loadv4f16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load half, ptr %g
+  %v = insertelement <4 x half> poison, half %l, i32 0
+  ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_offset(ptr %p) {
+; CHECK-LABEL: loadv8f16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load half, ptr %g
+  %v = insertelement <8 x half> poison, half %l, i32 0
+  ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_offset(ptr %p) {
+; CHECK-LABEL: loadv4bf16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load bfloat, ptr %g
+  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_offset(ptr %p) {
+; CHECK-LABEL: loadv8bf16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load bfloat, ptr %g
+  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+  ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_offset(ptr %p) {
+; CHECK-LABEL: loadv2f32_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load float, ptr %g
+  %v = insertelement <2 x float> poison, float %l, i32 0
+  ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_offset(ptr %p) {
+; CHECK-LABEL: loadv4f32_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load float, ptr %g
+  %v = insertelement <4 x float> poison, float %l, i32 0
+  ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_offset(ptr %p) {
+; CHECK-LABEL: loadv2f64_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur d0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load double, ptr %g
+  %v = insertelement <2 x double> poison, double %l, i32 0
+  ret <2 x double> %v
+}
+
+
+define <8 x i8> @loadv8i8_noffset(ptr %p) {
+; CHECK-LABEL: loadv8i8_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldurb w8, [x0, #-1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i8, ptr %g
+  %v = insertelement <8 x i8> poison, i8 %l, i32 0
+  ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_noffset(ptr %p) {
+; CHECK-LABEL: loadv16i8_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldurb w8, [x0, #-1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i8, ptr %g
+  %v = insertelement <16 x i8> poison, i8 %l, i32 0
+  ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4i16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldurh w8, [x0, #-1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i16, ptr %g
+  %v = insertelement <4 x i16> poison, i16 %l, i32 0
+  ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8i16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldurh w8, [x0, #-1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i16, ptr %g
+  %v = insertelement <8 x i16> poison, i16 %l, i32 0
+  ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_noffset(ptr %p) {
+; CHECK-LABEL: loadv2i32_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur w8, [x0, #-1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i32, ptr %g
+  %v = insertelement <2 x i32> poison, i32 %l, i32 0
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_noffset(ptr %p) {
+; CHECK-LABEL: loadv4i32_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur w8, [x0, #-1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i32, ptr %g
+  %v = insertelement <4 x i32> poison, i32 %l, i32 0
+  ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_noffset(ptr %p) {
+; CHECK-LABEL: loadv2i64_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur x8, [x0, #-1]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i64, ptr %g
+  %v = insertelement <2 x i64> poison, i64 %l, i32 0
+  ret <2 x i64> %v
+}
+
+
+define <4 x half> @loadv4f16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4f16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load half, ptr %g
+  %v = insertelement <4 x half> poison, half %l, i32 0
+  ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8f16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load half, ptr %g
+  %v = insertelement <8 x half> poison, half %l, i32 0
+  ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4bf16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load bfloat, ptr %g
+  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8bf16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load bfloat, ptr %g
+  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+  ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_noffset(ptr %p) {
+; CHECK-LABEL: loadv2f32_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load float, ptr %g
+  %v = insertelement <2 x float> poison, float %l, i32 0
+  ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_noffset(ptr %p) {
+; CHECK-LABEL: loadv4f32_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load float, ptr %g
+  %v = insertelement <4 x float> poison, float %l, i32 0
+  ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_noffset(ptr %p) {
+; CHECK-LABEL: loadv2f64_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur d0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load double, ptr %g
+  %v = insertelement <2 x double> poison, double %l, i32 0
+  ret <2 x double> %v
+}
+
+
+; ROW addressing modes
+
+define <8 x i8> @loadv8i8_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8i8_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, w1, sxtw]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i32 %o
+  %l = load i8, ptr %g
+  %v = insertelement <8 x i8> poison, i8 %l, i32 0
+  ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv16i8_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, w1, sxtw]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i32 %o
+  %l = load i8, ptr %g
+  %v = insertelement <16 x i8> poison, i8 %l, i32 0
+  ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4i16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i16, ptr %p, i32 %o
+  %l = load i16, ptr %g
+  %v = insertelement <4 x i16> poison, i16 %l, i32 0
+  ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8i16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i16, ptr %p, i32 %o
+  %l = load i16, ptr %g
+  %v = insertelement <8 x i16> poison, i16 %l, i32 0
+  ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2i32_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0, w1, sxtw #2]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i32, ptr %p, i32 %o
+  %l = load i32, ptr %g
+  %v = insertelement <2 x i32> poison, i32 %l, i32 0
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4i32_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0, w1, sxtw #2]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i32, ptr %p, i32 %o
+  %l = load i32, ptr %g
+  %v = insertelement <4 x i32> poison, i32 %l, i32 0
+  ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2i64_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, w1, sxtw #3]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i64, ptr %p, i32 %o
+  %l = load i64, ptr %g
+  %v = insertelement <2 x i64> poison, i64 %l, i32 0
+  ret <2 x i64> %v
+}
+
+define <4 x half> @loadv4f16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4f16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds half, ptr %p, i32 %o
+  %l = load half, ptr %g
+  %v = insertelement <4 x half> poison, half %l, i32 0
+  ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8f16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds half, ptr %p, i32 %o
+  %l = load half, ptr %g
+  %v = insertelement <8 x half> poison, half %l, i32 0
+  ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4bf16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds bfloat, ptr %p, i32 %o
+  %l = load bfloat, ptr %g
+  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8bf16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds bfloat, ptr %p, i32 %o
+  %l = load bfloat, ptr %g
+  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+  ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2f32_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0, w1, sxtw #2]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds float, ptr %p, i32 %o
+  %l = load float, ptr %g
+  %v = insertelement <2 x float> poison, float %l, i32 0
+  ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4f32_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0, w1, sxtw #2]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds float, ptr %p, i32 %o
+  %l = load float, ptr %g
+  %v = insertelement <4 x float> poison, float %l, i32 0
+  ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2f64_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, w1, sxtw #3]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds double, ptr %p, i32 %o
+  %l = load double, ptr %g
+  %v = insertelement <2 x double> poison, double %l, i32 0
+  ret <2 x double> %v
+}
+
+; roX
+
+define <8 x i8> @loadv8i8_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8i8_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, x1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 %o
+  %l = load i8, ptr %g
+  %v = insertelement <8 x i8> poison, i8 %l, i32 0
+  ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv16i8_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, x1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 %o
+  %l = load i8, ptr %g
+  %v = insertelement <16 x i8> poison, i8 %l, i32 0
+  ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4i16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i16, ptr %p, i64 %o
+  %l = load i16, ptr %g
+  %v = insertelement <4 x i16> poison, i16 %l, i32 0
+  ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8i16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i16, ptr %p, i64 %o
+  %l = load i16, ptr %g
+  %v = insertelement <8 x i16> poison, i16 %l, i32 0
+  ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2i32_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i32, ptr %p, i64 %o
+  %l = load i32, ptr %g
+  %v = insertelement <2 x i32> poison, i32 %l, i32 0
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4i32_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i32, ptr %p, i64 %o
+  %l = load i32, ptr %g
+  %v = insertelement <4 x i32> poison, i32 %l, i32 0
+  ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2i64_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i64, ptr %p, i64 %o
+  %l = load i64, ptr %g
+  %v = insertelement <2 x i64> poison, i64 %l, i32 0
+  ret <2 x i64> %v
+}
+
+define <4 x half> @loadv4f16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4f16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds half, ptr %p, i64 %o
+  %l = load half, ptr %g
+  %v = insertelement <4 x half> poison, half %l, i32 0
+  ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8f16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds half, ptr %p, i64 %o
+  %l = load half, ptr %g
+  %v = insertelement <8 x half> poison, half %l, i32 0
+  ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4bf16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds bfloat, ptr %p, i64 %o
+  %l = load bfloat, ptr %g
+  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8bf16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds bfloat, ptr %p, i64 %o
+  %l = load bfloat, ptr %g
+  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
+  ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2f32_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds float, ptr %p, i64 %o
+  %l = load float, ptr %g
+  %v = insertelement <2 x float> poison, float %l, i32 0
+  ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4f32_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds float, ptr %p, i64 %o
+  %l = load float, ptr %g
+  %v = insertelement <4 x float> poison, float %l, i32 0
+  ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2f64_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds double, ptr %p, i64 %o
+  %l = load double, ptr %g
+  %v = insertelement <2 x double> poison, double %l, i32 0
+  ret <2 x double> %v
+}
+
+
+; SVE
+
+define <vscale x 8 x i8> @loadnxv8i8(ptr %p) {
+; CHECK-LABEL: loadnxv8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %l = load i8, ptr %p
+  %v = insertelement <vscale x 8 x i8> poison, i8 %l, i32 0
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 16 x i8> @loadnxv16i8(ptr %p) {
+; CHECK-LABEL: loadnxv16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %l = load i8, ptr %p
+  %v = insertelement <vscale x 16 x i8> poison, i8 %l, i32 0
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 4 x i16> @loadnxv4i16(ptr %p) {
+; CHECK-LABEL: loadnxv4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %l = load i16, ptr %p
+  %v = insertelement <vscale x 4 x i16> poison, i16 %l, i32 0
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 8 x i16> @loadnxv8i16(ptr %p) {
+; CHECK-LABEL: loadnxv8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %l = load i16, ptr %p
+  %v = insertelement <vscale x 8 x i16> poison, i16 %l, i32 0
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 2 x i32> @loadnxv2i32(ptr %p) {
+; CHECK-LABEL: loadnxv2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %l = load i32, ptr %p
+  %v = insertelement <vscale x 2 x i32> poison, i32 %l, i32 0
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 4 x i32> @loadnxv4i32(ptr %p) {
+; CHECK-LABEL: loadnxv4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %l = load i32, ptr %p
+  %v = insertelement <vscale x 4 x i32> poison, i32 %l, i32 0
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 2 x i64> @loadnxv2i64(ptr %p) {
+; CHECK-LABEL: loadnxv2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %l = load i64, ptr %p
+  %v = insertelement <vscale x 2 x i64> poison, i64 %l, i32 0
+  ret <vscale x 2 x i64> %v
+}
+
+
+define <vscale x 4 x half> @loadnxv4f16(ptr %p) {
+; CHECK-LABEL: loadnxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load half, ptr %p
+  %v = insertelement <vscale x 4 x half> poison, half %l, i32 0
+  ret <vscale x 4 x half> %v
+}
+
+define <vscale x 8 x half> @loadnxv8f16(ptr %p) {
+; CHECK-LABEL: loadnxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load half, ptr %p
+  %v = insertelement <vscale x 8 x half> poison, half %l, i32 0
+  ret <vscale x 8 x half> %v
+}
+
+define <vscale x 4 x bfloat> @loadnxv4bf16(ptr %p) {
+; CHECK-LABEL: loadnxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load bfloat, ptr %p
+  %v = insertelement <vscale x 4 x bfloat> poison, bfloat %l, i32 0
+  ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @loadnxv8bf16(ptr %p) {
+; CHECK-LABEL: loadnxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+  %l = load bfloat, ptr %p
+  %v = insertelement <vscale x 8 x bfloat> poison, bfloat %l, i32 0
+  ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 2 x float> @loadnxv2f32(ptr %p) {
+; CHECK-LABEL: loadnxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+  %l = load float, ptr %p
+  %v = insertelement <vscale x 2 x float> poison, float %l, i32 0
+  ret <vscale x 2 x float> %v
+}
+
+define <vscale x 4 x float> @loadnxv4f32(ptr %p) {
+; CHECK-LABEL: loadnxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+  %l = load float, ptr %p
+  %v = insertelement <vscale x 4 x float> poison, float %l, i32 0
+  ret <vscale x 4 x float> %v
+}
+
+define <vscale x 2 x double> @loadnxv2f64(ptr %p) {
+; CHECK-LABEL: loadnxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
+  %l = load double, ptr %p
+  %v = insertelement <vscale x 2 x double> poison, double %l, i32 0
+  ret <vscale x 2 x double> %v
+}
+
+
+; Unscaled
+
+define <vscale x 8 x i8> @loadnxv8i8_offset(ptr %p) {
+; CHECK-LABEL: loadnxv8i8_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrb w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i8, ptr %g
+  %v = insertelement <vscale x 8 x i8> poison, i8 %l, i32 0
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 16 x i8> @loadnxv16i8_offset(ptr %p) {
+; CHECK-LABEL: loadnxv16i8_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrb w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i8, ptr %g
+  %v = insertelement <vscale x 16 x i8> poison, i8 %l, i32 0
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 4 x i16> @loadnxv4i16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4i16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldurh w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i16, ptr %g
+  %v = insertelement <vscale x 4 x i16> poison, i16 %l, i32 0
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 8 x i16> @loadnxv8i16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv8i16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldurh w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i16, ptr %g
+  %v = insertelement <vscale x 8 x i16> poison, i16 %l, i32 0
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 2 x i32> @loadnxv2i32_offset(ptr %p) {
+; CHECK-LABEL: loadnxv2i32_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur w8, [x0, #1]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i32, ptr %g
+  %v = insertelement <vscale x 2 x i32> poison, i32 %l, i32 0
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 4 x i32> @loadnxv4i32_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4i32_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur w8, [x0, #1]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i32, ptr %g
+  %v = insertelement <vscale x 4 x i32> poison, i32 %l, i32 0
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 2 x i64> @loadnxv2i64_offset(ptr %p) {
+; CHECK-LABEL: loadnxv2i64_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur x8, [x0, #1]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load i64, ptr %g
+  %v = insertelement <vscale x 2 x i64> poison, i64 %l, i32 0
+  ret <vscale x 2 x i64> %v
+}
+
+
+define <vscale x 4 x half> @loadnxv4f16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4f16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load half, ptr %g
+  %v = insertelement <vscale x 4 x half> poison, half %l, i32 0
+  ret <vscale x 4 x half> %v
+}
+
+define <vscale x 8 x half> @loadnxv8f16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv8f16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load half, ptr %g
+  %v = insertelement <vscale x 8 x half> poison, half %l, i32 0
+  ret <vscale x 8 x half> %v
+}
+
+define <vscale x 4 x bfloat> @loadnxv4bf16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4bf16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load bfloat, ptr %g
+  %v = insertelement <vscale x 4 x bfloat> poison, bfloat %l, i32 0
+  ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @loadnxv8bf16_offset(ptr %p) {
+; CHECK-LABEL: loadnxv8bf16_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load bfloat, ptr %g
+  %v = insertelement <vscale x 8 x bfloat> poison, bfloat %l, i32 0
+  ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 2 x float> @loadnxv2f32_offset(ptr %p) {
+; CHECK-LABEL: loadnxv2f32_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load float, ptr %g
+  %v = insertelement <vscale x 2 x float> poison, float %l, i32 0
+  ret <vscale x 2 x float> %v
+}
+
+define <vscale x 4 x float> @loadnxv4f32_offset(ptr %p) {
+; CHECK-LABEL: loadnxv4f32_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load float, ptr %g
+  %v = insertelement <vscale x 4 x float> poison, float %l, i32 0
+  ret <vscale x 4 x float> %v
+}
+
+define <vscale x 2 x double> @loadnxv2f64_offset(ptr %p) {
+; CHECK-LABEL: loadnxv2f64_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur d0, [x0, #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 1
+  %l = load double, ptr %g
+  %v = insertelement <vscale x 2 x double> poison, double %l, i32 0
+  ret <vscale x 2 x double> %v
+}

diff  --git a/llvm/test/CodeGen/AArch64/load-insert-zero.ll b/llvm/test/CodeGen/AArch64/load-insert-zero.ll
index 23d545459295fc..ccbd6f03fbcc36 100644
--- a/llvm/test/CodeGen/AArch64/load-insert-zero.ll
+++ b/llvm/test/CodeGen/AArch64/load-insert-zero.ll
@@ -378,7 +378,6 @@ define <2 x i64> @loadv2i64_noffset(ptr %p) {
   ret <2 x i64> %v
 }
 
-
 define <4 x half> @loadv4f16_noffset(ptr %p) {
 ; CHECK-LABEL: loadv4f16_noffset:
 ; CHECK:       // %bb.0:
@@ -457,6 +456,328 @@ define <2 x double> @loadv2f64_noffset(ptr %p) {
 }
 
 
+; ROW addressing modes
+
+define <8 x i8> @loadv8i8_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8i8_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, w1, sxtw]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i32 %o
+  %l = load i8, ptr %g
+  %v = insertelement <8 x i8> zeroinitializer, i8 %l, i32 0
+  ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv16i8_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, w1, sxtw]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i32 %o
+  %l = load i8, ptr %g
+  %v = insertelement <16 x i8> zeroinitializer, i8 %l, i32 0
+  ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4i16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i16, ptr %p, i32 %o
+  %l = load i16, ptr %g
+  %v = insertelement <4 x i16> zeroinitializer, i16 %l, i32 0
+  ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8i16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i16, ptr %p, i32 %o
+  %l = load i16, ptr %g
+  %v = insertelement <8 x i16> zeroinitializer, i16 %l, i32 0
+  ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2i32_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, w1, sxtw #2
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i32, ptr %p, i32 %o
+  %l = load i32, ptr %g
+  %v = insertelement <2 x i32> zeroinitializer, i32 %l, i32 0
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4i32_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, w1, sxtw #2
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i32, ptr %p, i32 %o
+  %l = load i32, ptr %g
+  %v = insertelement <4 x i32> zeroinitializer, i32 %l, i32 0
+  ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2i64_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, w1, sxtw #3]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i64, ptr %p, i32 %o
+  %l = load i64, ptr %g
+  %v = insertelement <2 x i64> zeroinitializer, i64 %l, i32 0
+  ret <2 x i64> %v
+}
+
+define <4 x half> @loadv4f16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4f16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds half, ptr %p, i32 %o
+  %l = load half, ptr %g
+  %v = insertelement <4 x half> zeroinitializer, half %l, i32 0
+  ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8f16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds half, ptr %p, i32 %o
+  %l = load half, ptr %g
+  %v = insertelement <8 x half> zeroinitializer, half %l, i32 0
+  ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4bf16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds bfloat, ptr %p, i32 %o
+  %l = load bfloat, ptr %g
+  %v = insertelement <4 x bfloat> zeroinitializer, bfloat %l, i32 0
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv8bf16_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds bfloat, ptr %p, i32 %o
+  %l = load bfloat, ptr %g
+  %v = insertelement <8 x bfloat> zeroinitializer, bfloat %l, i32 0
+  ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2f32_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, w1, sxtw #2
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds float, ptr %p, i32 %o
+  %l = load float, ptr %g
+  %v = insertelement <2 x float> zeroinitializer, float %l, i32 0
+  ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv4f32_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, w1, sxtw #2
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds float, ptr %p, i32 %o
+  %l = load float, ptr %g
+  %v = insertelement <4 x float> zeroinitializer, float %l, i32 0
+  ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_roW(ptr %p, i32 %o) {
+; CHECK-LABEL: loadv2f64_roW:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, w1, sxtw #3]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds double, ptr %p, i32 %o
+  %l = load double, ptr %g
+  %v = insertelement <2 x double> zeroinitializer, double %l, i32 0
+  ret <2 x double> %v
+}
+
+
+; roX
+
+define <8 x i8> @loadv8i8_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8i8_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, x1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 %o
+  %l = load i8, ptr %g
+  %v = insertelement <8 x i8> zeroinitializer, i8 %l, i32 0
+  ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv16i8_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr b0, [x0, x1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 %o
+  %l = load i8, ptr %g
+  %v = insertelement <16 x i8> zeroinitializer, i8 %l, i32 0
+  ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4i16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i16, ptr %p, i64 %o
+  %l = load i16, ptr %g
+  %v = insertelement <4 x i16> zeroinitializer, i16 %l, i32 0
+  ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8i16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i16, ptr %p, i64 %o
+  %l = load i16, ptr %g
+  %v = insertelement <8 x i16> zeroinitializer, i16 %l, i32 0
+  ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2i32_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x1, lsl #2
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i32, ptr %p, i64 %o
+  %l = load i32, ptr %g
+  %v = insertelement <2 x i32> zeroinitializer, i32 %l, i32 0
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4i32_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x1, lsl #2
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i32, ptr %p, i64 %o
+  %l = load i32, ptr %g
+  %v = insertelement <4 x i32> zeroinitializer, i32 %l, i32 0
+  ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2i64_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i64, ptr %p, i64 %o
+  %l = load i64, ptr %g
+  %v = insertelement <2 x i64> zeroinitializer, i64 %l, i32 0
+  ret <2 x i64> %v
+}
+
+define <4 x half> @loadv4f16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4f16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds half, ptr %p, i64 %o
+  %l = load half, ptr %g
+  %v = insertelement <4 x half> zeroinitializer, half %l, i32 0
+  ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8f16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds half, ptr %p, i64 %o
+  %l = load half, ptr %g
+  %v = insertelement <8 x half> zeroinitializer, half %l, i32 0
+  ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4bf16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds bfloat, ptr %p, i64 %o
+  %l = load bfloat, ptr %g
+  %v = insertelement <4 x bfloat> zeroinitializer, bfloat %l, i32 0
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv8bf16_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds bfloat, ptr %p, i64 %o
+  %l = load bfloat, ptr %g
+  %v = insertelement <8 x bfloat> zeroinitializer, bfloat %l, i32 0
+  ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2f32_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x1, lsl #2
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds float, ptr %p, i64 %o
+  %l = load float, ptr %g
+  %v = insertelement <2 x float> zeroinitializer, float %l, i32 0
+  ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv4f32_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x1, lsl #2
+; CHECK-NEXT:    ldr s0, [x8]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds float, ptr %p, i64 %o
+  %l = load float, ptr %g
+  %v = insertelement <4 x float> zeroinitializer, float %l, i32 0
+  ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_roX(ptr %p, i64 %o) {
+; CHECK-LABEL: loadv2f64_roX:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds double, ptr %p, i64 %o
+  %l = load double, ptr %g
+  %v = insertelement <2 x double> zeroinitializer, double %l, i32 0
+  ret <2 x double> %v
+}
+
+
 define void @predictor_4x4_neon(ptr nocapture noundef writeonly %0, i64 noundef %1, ptr nocapture noundef readonly %2, ptr nocapture noundef readnone %3) {
 ; CHECK-LABEL: predictor_4x4_neon:
 ; CHECK:       // %bb.0:


        


More information about the llvm-commits mailing list