[llvm] 55b6c3e - [AArch64][GlobalISel] Add dup tests where load is not combined into dup. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 8 13:34:30 PDT 2025


Author: David Green
Date: 2025-06-08T21:34:26+01:00
New Revision: 55b6c3ed17c8937a99a1b787164417157ab871b9

URL: https://github.com/llvm/llvm-project/commit/55b6c3ed17c8937a99a1b787164417157ab871b9
DIFF: https://github.com/llvm/llvm-project/commit/55b6c3ed17c8937a99a1b787164417157ab871b9.diff

LOG: [AArch64][GlobalISel] Add dup tests where load is not combined into dup. NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/dup.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index bfc0ef0826f68..bdeab033ce084 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -5,6 +5,12 @@
 ; CHECK-GI:       warning: Instruction selection used fallback path for dup_v2i8
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for duplane0_v2i8
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_v2i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_str_v2i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_str_v3i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_str_v4i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_str_v8i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_str_v16i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_str_v32i8
 
 define <2 x i8> @dup_v2i8(i8 %a) {
 ; CHECK-LABEL: dup_v2i8:
@@ -41,6 +47,21 @@ entry:
   ret <2 x i8> %c
 }
 
+define <2 x i8> @loaddup_str_v2i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v2i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    strb wzr, [x0]
+; CHECK-NEXT:    dup v0.2s, w8
+; CHECK-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <2 x i8> poison, i8 %a, i64 0
+  store i8 0, ptr %p
+  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
+  ret <2 x i8> %c
+}
+
 define <3 x i8> @dup_v3i8(i8 %a) {
 ; CHECK-SD-LABEL: dup_v3i8:
 ; CHECK-SD:       // %bb.0: // %entry
@@ -105,6 +126,23 @@ entry:
   ret <3 x i8> %c
 }
 
+define <3 x i8> @loaddup_str_v3i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v3i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:    ldrb w0, [x0]
+; CHECK-NEXT:    strb wzr, [x8]
+; CHECK-NEXT:    mov w1, w0
+; CHECK-NEXT:    mov w2, w0
+; CHECK-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <3 x i8> poison, i8 %a, i64 0
+  store i8 0, ptr %p
+  %c = shufflevector <3 x i8> %b, <3 x i8> poison, <3 x i32> zeroinitializer
+  ret <3 x i8> %c
+}
+
 define <4 x i8> @dup_v4i8(i8 %a) {
 ; CHECK-SD-LABEL: dup_v4i8:
 ; CHECK-SD:       // %bb.0: // %entry
@@ -162,6 +200,21 @@ entry:
   ret <4 x i8> %c
 }
 
+define <4 x i8> @loaddup_str_v4i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    strb wzr, [x0]
+; CHECK-NEXT:    dup v0.4h, w8
+; CHECK-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <4 x i8> poison, i8 %a, i64 0
+  store i8 0, ptr %p
+  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
+  ret <4 x i8> %c
+}
+
 define <8 x i8> @dup_v8i8(i8 %a) {
 ; CHECK-LABEL: dup_v8i8:
 ; CHECK:       // %bb.0: // %entry
@@ -196,6 +249,20 @@ entry:
   ret <8 x i8> %c
 }
 
+define <8 x i8> @loaddup_str_v8i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.8b }, [x0]
+; CHECK-NEXT:    strb wzr, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <8 x i8> poison, i8 %a, i64 0
+  store i8 0, ptr %p
+  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
+  ret <8 x i8> %c
+}
+
 define <16 x i8> @dup_v16i8(i8 %a) {
 ; CHECK-LABEL: dup_v16i8:
 ; CHECK:       // %bb.0: // %entry
@@ -229,6 +296,20 @@ entry:
   ret <16 x i8> %c
 }
 
+define <16 x i8> @loaddup_str_v16i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.16b }, [x0]
+; CHECK-NEXT:    strb wzr, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <16 x i8> poison, i8 %a, i64 0
+  store i8 0, ptr %p
+  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
+  ret <16 x i8> %c
+}
+
 define <32 x i8> @dup_v32i8(i8 %a) {
 ; CHECK-LABEL: dup_v32i8:
 ; CHECK:       // %bb.0: // %entry
@@ -271,6 +352,21 @@ entry:
   ret <32 x i8> %c
 }
 
+define <32 x i8> @loaddup_str_v32i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v32i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.16b }, [x0]
+; CHECK-NEXT:    strb wzr, [x0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <32 x i8> poison, i8 %a, i64 0
+  store i8 0, ptr %p
+  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
+  ret <32 x i8> %c
+}
+
 define <2 x i16> @dup_v2i16(i16 %a) {
 ; CHECK-SD-LABEL: dup_v2i16:
 ; CHECK-SD:       // %bb.0: // %entry
@@ -328,6 +424,30 @@ entry:
   ret <2 x i16> %c
 }
 
+define <2 x i16> @loaddup_str_v2i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldrh w8, [x0]
+; CHECK-SD-NEXT:    strh wzr, [x0]
+; CHECK-SD-NEXT:    dup v0.2s, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <2 x i16> poison, i16 %a, i64 0
+  store i16 0, ptr %p
+  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
+  ret <2 x i16> %c
+}
+
 define <3 x i16> @dup_v3i16(i16 %a) {
 ; CHECK-LABEL: dup_v3i16:
 ; CHECK:       // %bb.0: // %entry
@@ -362,6 +482,27 @@ entry:
   ret <3 x i16> %c
 }
 
+define <3 x i16> @loaddup_str_v3i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT:    strh wzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <3 x i16> poison, i16 %a, i64 0
+  store i16 0, ptr %p
+  %c = shufflevector <3 x i16> %b, <3 x i16> poison, <3 x i32> zeroinitializer
+  ret <3 x i16> %c
+}
+
 define <4 x i16> @dup_v4i16(i16 %a) {
 ; CHECK-LABEL: dup_v4i16:
 ; CHECK:       // %bb.0: // %entry
@@ -396,6 +537,27 @@ entry:
   ret <4 x i16> %c
 }
 
+define <4 x i16> @loaddup_str_v4i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT:    strh wzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <4 x i16> poison, i16 %a, i64 0
+  store i16 0, ptr %p
+  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
+  ret <4 x i16> %c
+}
+
 define <8 x i16> @dup_v8i16(i16 %a) {
 ; CHECK-LABEL: dup_v8i16:
 ; CHECK:       // %bb.0: // %entry
@@ -429,6 +591,27 @@ entry:
   ret <8 x i16> %c
 }
 
+define <8 x i16> @loaddup_str_v8i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT:    strh wzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <8 x i16> poison, i16 %a, i64 0
+  store i16 0, ptr %p
+  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %c
+}
+
 define <16 x i16> @dup_v16i16(i16 %a) {
 ; CHECK-LABEL: dup_v16i16:
 ; CHECK:       // %bb.0: // %entry
@@ -471,6 +654,29 @@ entry:
   ret <16 x i16> %c
 }
 
+define <16 x i16> @loaddup_str_v16i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT:    strh wzr, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h1, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.8h, v1.h[0]
+; CHECK-GI-NEXT:    dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <16 x i16> poison, i16 %a, i64 0
+  store i16 0, ptr %p
+  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %c
+}
+
 define <2 x i32> @dup_v2i32(i32 %a) {
 ; CHECK-LABEL: dup_v2i32:
 ; CHECK:       // %bb.0: // %entry
@@ -505,6 +711,27 @@ entry:
   ret <2 x i32> %c
 }
 
+define <2 x i32> @loaddup_str_v2i32(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.2s }, [x0]
+; CHECK-SD-NEXT:    str wzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [x0]
+; CHECK-GI-NEXT:    str wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.2s, v0.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i32, ptr %p
+  %b = insertelement <2 x i32> poison, i32 %a, i64 0
+  store i32 0, ptr %p
+  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
+  ret <2 x i32> %c
+}
+
 define <3 x i32> @dup_v3i32(i32 %a) {
 ; CHECK-LABEL: dup_v3i32:
 ; CHECK:       // %bb.0: // %entry
@@ -538,6 +765,27 @@ entry:
   ret <3 x i32> %c
 }
 
+define <3 x i32> @loaddup_str_v3i32(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT:    str wzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [x0]
+; CHECK-GI-NEXT:    str wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i32, ptr %p
+  %b = insertelement <3 x i32> poison, i32 %a, i64 0
+  store i32 0, ptr %p
+  %c = shufflevector <3 x i32> %b, <3 x i32> poison, <3 x i32> zeroinitializer
+  ret <3 x i32> %c
+}
+
 define <4 x i32> @dup_v4i32(i32 %a) {
 ; CHECK-LABEL: dup_v4i32:
 ; CHECK:       // %bb.0: // %entry
@@ -571,6 +819,27 @@ entry:
   ret <4 x i32> %c
 }
 
+define <4 x i32> @loaddup_str_v4i32(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT:    str wzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [x0]
+; CHECK-GI-NEXT:    str wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i32, ptr %p
+  %b = insertelement <4 x i32> poison, i32 %a, i64 0
+  store i32 0, ptr %p
+  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
+  ret <4 x i32> %c
+}
+
 define <8 x i32> @dup_v8i32(i32 %a) {
 ; CHECK-LABEL: dup_v8i32:
 ; CHECK:       // %bb.0: // %entry
@@ -613,6 +882,29 @@ entry:
   ret <8 x i32> %c
 }
 
+define <8 x i32> @loaddup_str_v8i32(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT:    str wzr, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s1, [x0]
+; CHECK-GI-NEXT:    str wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4s, v1.s[0]
+; CHECK-GI-NEXT:    dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i32, ptr %p
+  %b = insertelement <8 x i32> poison, i32 %a, i64 0
+  store i32 0, ptr %p
+  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
+  ret <8 x i32> %c
+}
+
 define <2 x i64> @dup_v2i64(i64 %a) {
 ; CHECK-LABEL: dup_v2i64:
 ; CHECK:       // %bb.0: // %entry
@@ -646,6 +938,27 @@ entry:
   ret <2 x i64> %c
 }
 
+define <2 x i64> @loaddup_str_v2i64(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT:    str xzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr d0, [x0]
+; CHECK-GI-NEXT:    str xzr, [x0]
+; CHECK-GI-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i64, ptr %p
+  %b = insertelement <2 x i64> poison, i64 %a, i64 0
+  store i64 0, ptr %p
+  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
+  ret <2 x i64> %c
+}
+
 define <3 x i64> @dup_v3i64(i64 %a) {
 ; CHECK-SD-LABEL: dup_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
@@ -711,6 +1024,33 @@ entry:
   ret <3 x i64> %c
 }
 
+define <3 x i64> @loaddup_str_v3i64(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldr d0, [x0]
+; CHECK-SD-NEXT:    str xzr, [x0]
+; CHECK-SD-NEXT:    fmov d1, d0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr d2, [x0]
+; CHECK-GI-NEXT:    str xzr, [x0]
+; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
+; CHECK-GI-NEXT:    dup v2.2d, v2.d[0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i64, ptr %p
+  %b = insertelement <3 x i64> poison, i64 %a, i64 0
+  store i64 0, ptr %p
+  %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
+  ret <3 x i64> %c
+}
+
 define <4 x i64> @dup_v4i64(i64 %a) {
 ; CHECK-LABEL: dup_v4i64:
 ; CHECK:       // %bb.0: // %entry
@@ -753,6 +1093,29 @@ entry:
   ret <4 x i64> %c
 }
 
+define <4 x i64> @loaddup_str_v4i64(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT:    str xzr, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr d1, [x0]
+; CHECK-GI-NEXT:    str xzr, [x0]
+; CHECK-GI-NEXT:    dup v0.2d, v1.d[0]
+; CHECK-GI-NEXT:    dup v1.2d, v1.d[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i64, ptr %p
+  %b = insertelement <4 x i64> poison, i64 %a, i64 0
+  store i64 0, ptr %p
+  %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> zeroinitializer
+  ret <4 x i64> %c
+}
+
 define <2 x i128> @dup_v2i128(i128 %a) {
 ; CHECK-LABEL: dup_v2i128:
 ; CHECK:       // %bb.0: // %entry
@@ -800,6 +1163,38 @@ entry:
   ret <2 x i128> %c
 }
 
+define <2 x i128> @loaddup_str_v2i128(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2i128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov x8, x0
+; CHECK-SD-NEXT:    ldr x0, [x0]
+; CHECK-SD-NEXT:    ldr x1, [x8, #8]
+; CHECK-SD-NEXT:    stp xzr, xzr, [x8]
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2i128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr q1, [x0]
+; CHECK-GI-NEXT:    mov v0.d[0], xzr
+; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    mov d2, v1.d[1]
+; CHECK-GI-NEXT:    fmov x0, d1
+; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    mov v0.d[1], xzr
+; CHECK-GI-NEXT:    fmov x1, d2
+; CHECK-GI-NEXT:    fmov x3, d2
+; CHECK-GI-NEXT:    str q0, [x8]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i128, ptr %p
+  %b = insertelement <2 x i128> poison, i128 %a, i64 0
+  store i128 0, ptr %p
+  %c = shufflevector <2 x i128> %b, <2 x i128> poison, <2 x i32> zeroinitializer
+  ret <2 x i128> %c
+}
+
 define <3 x i128> @dup_v3i128(i128 %a) {
 ; CHECK-LABEL: dup_v3i128:
 ; CHECK:       // %bb.0: // %entry
@@ -855,6 +1250,41 @@ entry:
   ret <3 x i128> %c
 }
 
+define <3 x i128> @loaddup_str_v3i128(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3i128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov x8, x0
+; CHECK-SD-NEXT:    ldp x0, x1, [x0]
+; CHECK-SD-NEXT:    stp xzr, xzr, [x8]
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x4, x0
+; CHECK-SD-NEXT:    mov x5, x1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3i128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr q1, [x0]
+; CHECK-GI-NEXT:    mov v0.d[0], xzr
+; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    mov d2, v1.d[1]
+; CHECK-GI-NEXT:    fmov x0, d1
+; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    fmov x4, d1
+; CHECK-GI-NEXT:    mov v0.d[1], xzr
+; CHECK-GI-NEXT:    fmov x1, d2
+; CHECK-GI-NEXT:    fmov x3, d2
+; CHECK-GI-NEXT:    fmov x5, d2
+; CHECK-GI-NEXT:    str q0, [x8]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i128, ptr %p
+  %b = insertelement <3 x i128> poison, i128 %a, i64 0
+  store i128 0, ptr %p
+  %c = shufflevector <3 x i128> %b, <3 x i128> poison, <3 x i32> zeroinitializer
+  ret <3 x i128> %c
+}
+
 define <4 x i128> @dup_v4i128(i128 %a) {
 ; CHECK-LABEL: dup_v4i128:
 ; CHECK:       // %bb.0: // %entry
@@ -918,6 +1348,45 @@ entry:
   ret <4 x i128> %c
 }
 
+define <4 x i128> @loaddup_str_v4i128(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4i128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov x8, x0
+; CHECK-SD-NEXT:    ldp x0, x1, [x0]
+; CHECK-SD-NEXT:    stp xzr, xzr, [x8]
+; CHECK-SD-NEXT:    mov x2, x0
+; CHECK-SD-NEXT:    mov x3, x1
+; CHECK-SD-NEXT:    mov x4, x0
+; CHECK-SD-NEXT:    mov x5, x1
+; CHECK-SD-NEXT:    mov x6, x0
+; CHECK-SD-NEXT:    mov x7, x1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4i128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr q1, [x0]
+; CHECK-GI-NEXT:    mov v0.d[0], xzr
+; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    mov d2, v1.d[1]
+; CHECK-GI-NEXT:    fmov x0, d1
+; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    fmov x4, d1
+; CHECK-GI-NEXT:    fmov x6, d1
+; CHECK-GI-NEXT:    mov v0.d[1], xzr
+; CHECK-GI-NEXT:    fmov x1, d2
+; CHECK-GI-NEXT:    fmov x3, d2
+; CHECK-GI-NEXT:    fmov x5, d2
+; CHECK-GI-NEXT:    fmov x7, d2
+; CHECK-GI-NEXT:    str q0, [x8]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i128, ptr %p
+  %b = insertelement <4 x i128> poison, i128 %a, i64 0
+  store i128 0, ptr %p
+  %c = shufflevector <4 x i128> %b, <4 x i128> poison, <4 x i32> zeroinitializer
+  ret <4 x i128> %c
+}
+
 define <2 x half> @dup_v2half(half %a) {
 ; CHECK-LABEL: dup_v2half:
 ; CHECK:       // %bb.0: // %entry
@@ -953,6 +1422,28 @@ entry:
   ret <2 x half> %c
 }
 
+define <2 x half> @loaddup_str_v2half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2half:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT:    str h1, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2half:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <2 x half> poison, half %a, i64 0
+  store half 0.0, ptr %p
+  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
+  ret <2 x half> %c
+}
+
 define <3 x half> @dup_v3half(half %a) {
 ; CHECK-LABEL: dup_v3half:
 ; CHECK:       // %bb.0: // %entry
@@ -988,6 +1479,28 @@ entry:
   ret <3 x half> %c
 }
 
+define <3 x half> @loaddup_str_v3half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3half:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT:    str h1, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3half:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <3 x half> poison, half %a, i64 0
+  store half 0.0, ptr %p
+  %c = shufflevector <3 x half> %b, <3 x half> poison, <3 x i32> zeroinitializer
+  ret <3 x half> %c
+}
+
 define <4 x half> @dup_v4half(half %a) {
 ; CHECK-LABEL: dup_v4half:
 ; CHECK:       // %bb.0: // %entry
@@ -1023,6 +1536,28 @@ entry:
   ret <4 x half> %c
 }
 
+define <4 x half> @loaddup_str_v4half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4half:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT:    str h1, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4half:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <4 x half> poison, half %a, i64 0
+  store half 0.0, ptr %p
+  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
+  ret <4 x half> %c
+}
+
 define <8 x half> @dup_v8half(half %a) {
 ; CHECK-LABEL: dup_v8half:
 ; CHECK:       // %bb.0: // %entry
@@ -1057,6 +1592,28 @@ entry:
   ret <8 x half> %c
 }
 
+define <8 x half> @loaddup_str_v8half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8half:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT:    str h1, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8half:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <8 x half> poison, half %a, i64 0
+  store half 0.0, ptr %p
+  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
+  ret <8 x half> %c
+}
+
 define <16 x half> @dup_v16half(half %a) {
 ; CHECK-SD-LABEL: dup_v16half:
 ; CHECK-SD:       // %bb.0: // %entry
@@ -1108,6 +1665,30 @@ entry:
   ret <16 x half> %c
 }
 
+define <16 x half> @loaddup_str_v16half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v16half:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT:    movi d2, #0000000000000000
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    str h2, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v16half:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h1, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.8h, v1.h[0]
+; CHECK-GI-NEXT:    dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <16 x half> poison, half %a, i64 0
+  store half 0.0, ptr %p
+  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
+  ret <16 x half> %c
+}
+
 define <2 x bfloat> @dup_v2bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v2bfloat:
 ; CHECK:       // %bb.0: // %entry
@@ -1143,6 +1724,28 @@ entry:
   ret <2 x bfloat> %c
 }
 
+define <2 x bfloat> @loaddup_str_v2bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2bfloat:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT:    str h1, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2bfloat:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <2 x bfloat> poison, bfloat %a, i64 0
+  store bfloat 0.0, ptr %p
+  %c = shufflevector <2 x bfloat> %b, <2 x bfloat> poison, <2 x i32> zeroinitializer
+  ret <2 x bfloat> %c
+}
+
 define <3 x bfloat> @dup_v3bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v3bfloat:
 ; CHECK:       // %bb.0: // %entry
@@ -1178,6 +1781,28 @@ entry:
   ret <3 x bfloat> %c
 }
 
+define <3 x bfloat> @loaddup_str_v3bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3bfloat:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT:    str h1, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3bfloat:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <3 x bfloat> poison, bfloat %a, i64 0
+  store bfloat 0.0, ptr %p
+  %c = shufflevector <3 x bfloat> %b, <3 x bfloat> poison, <3 x i32> zeroinitializer
+  ret <3 x bfloat> %c
+}
+
 define <4 x bfloat> @dup_v4bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v4bfloat:
 ; CHECK:       // %bb.0: // %entry
@@ -1213,6 +1838,28 @@ entry:
   ret <4 x bfloat> %c
 }
 
+define <4 x bfloat> @loaddup_str_v4bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4bfloat:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT:    str h1, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4bfloat:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <4 x bfloat> poison, bfloat %a, i64 0
+  store bfloat 0.0, ptr %p
+  %c = shufflevector <4 x bfloat> %b, <4 x bfloat> poison, <4 x i32> zeroinitializer
+  ret <4 x bfloat> %c
+}
+
 define <8 x bfloat> @dup_v8bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v8bfloat:
 ; CHECK:       // %bb.0: // %entry
@@ -1247,6 +1894,28 @@ entry:
   ret <8 x bfloat> %c
 }
 
+define <8 x bfloat> @loaddup_str_v8bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8bfloat:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT:    str h1, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8bfloat:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <8 x bfloat> poison, bfloat %a, i64 0
+  store bfloat 0.0, ptr %p
+  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
+  ret <8 x bfloat> %c
+}
+
 define <16 x bfloat> @dup_v16bfloat(bfloat %a) {
 ; CHECK-SD-LABEL: dup_v16bfloat:
 ; CHECK-SD:       // %bb.0: // %entry
@@ -1298,6 +1967,30 @@ entry:
   ret <16 x bfloat> %c
 }
 
+define <16 x bfloat> @loaddup_str_v16bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v16bfloat:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT:    movi d2, #0000000000000000
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    str h2, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v16bfloat:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr h1, [x0]
+; CHECK-GI-NEXT:    strh wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.8h, v1.h[0]
+; CHECK-GI-NEXT:    dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <16 x bfloat> poison, bfloat %a, i64 0
+  store bfloat 0.0, ptr %p
+  %c = shufflevector <16 x bfloat> %b, <16 x bfloat> poison, <16 x i32> zeroinitializer
+  ret <16 x bfloat> %c
+}
+
 define <2 x float> @dup_v2float(float %a) {
 ; CHECK-LABEL: dup_v2float:
 ; CHECK:       // %bb.0: // %entry
@@ -1333,6 +2026,27 @@ entry:
   ret <2 x float> %c
 }
 
+define <2 x float> @loaddup_str_v2float(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2float:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.2s }, [x0]
+; CHECK-SD-NEXT:    str wzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2float:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [x0]
+; CHECK-GI-NEXT:    str wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.2s, v0.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load float, ptr %p
+  %b = insertelement <2 x float> poison, float %a, i64 0
+  store float 0.0, ptr %p
+  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
+  ret <2 x float> %c
+}
+
 define <3 x float> @dup_v3float(float %a) {
 ; CHECK-LABEL: dup_v3float:
 ; CHECK:       // %bb.0: // %entry
@@ -1367,6 +2081,27 @@ entry:
   ret <3 x float> %c
 }
 
+define <3 x float> @loaddup_str_v3float(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3float:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT:    str wzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3float:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [x0]
+; CHECK-GI-NEXT:    str wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load float, ptr %p
+  %b = insertelement <3 x float> poison, float %a, i64 0
+  store float 0.0, ptr %p
+  %c = shufflevector <3 x float> %b, <3 x float> poison, <3 x i32> zeroinitializer
+  ret <3 x float> %c
+}
+
 define <4 x float> @dup_v4float(float %a) {
 ; CHECK-LABEL: dup_v4float:
 ; CHECK:       // %bb.0: // %entry
@@ -1401,6 +2136,27 @@ entry:
   ret <4 x float> %c
 }
 
+define <4 x float> @loaddup_str_v4float(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4float:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT:    str wzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4float:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s0, [x0]
+; CHECK-GI-NEXT:    str wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load float, ptr %p
+  %b = insertelement <4 x float> poison, float %a, i64 0
+  store float 0.0, ptr %p
+  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
+  ret <4 x float> %c
+}
+
 define <8 x float> @dup_v8float(float %a) {
 ; CHECK-SD-LABEL: dup_v8float:
 ; CHECK-SD:       // %bb.0: // %entry
@@ -1452,6 +2208,29 @@ entry:
   ret <8 x float> %c
 }
 
+define <8 x float> @loaddup_str_v8float(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8float:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT:    str wzr, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8float:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr s1, [x0]
+; CHECK-GI-NEXT:    str wzr, [x0]
+; CHECK-GI-NEXT:    dup v0.4s, v1.s[0]
+; CHECK-GI-NEXT:    dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load float, ptr %p
+  %b = insertelement <8 x float> poison, float %a, i64 0
+  store float 0.0, ptr %p
+  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
+  ret <8 x float> %c
+}
+
 define <2 x double> @dup_v2double(double %a) {
 ; CHECK-LABEL: dup_v2double:
 ; CHECK:       // %bb.0: // %entry
@@ -1486,6 +2265,27 @@ entry:
   ret <2 x double> %c
 }
 
+define <2 x double> @loaddup_str_v2double(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2double:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT:    str xzr, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2double:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr d0, [x0]
+; CHECK-GI-NEXT:    str xzr, [x0]
+; CHECK-GI-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load double, ptr %p
+  %b = insertelement <2 x double> poison, double %a, i64 0
+  store double 0.0, ptr %p
+  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
+  ret <2 x double> %c
+}
+
 define <3 x double> @dup_v3double(double %a) {
 ; CHECK-SD-LABEL: dup_v3double:
 ; CHECK-SD:       // %bb.0: // %entry
@@ -1553,6 +2353,33 @@ entry:
   ret <3 x double> %c
 }
 
+define <3 x double> @loaddup_str_v3double(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3double:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldr d0, [x0]
+; CHECK-SD-NEXT:    str xzr, [x0]
+; CHECK-SD-NEXT:    fmov d1, d0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3double:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr d2, [x0]
+; CHECK-GI-NEXT:    str xzr, [x0]
+; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
+; CHECK-GI-NEXT:    dup v2.2d, v2.d[0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load double, ptr %p
+  %b = insertelement <3 x double> poison, double %a, i64 0
+  store double 0.0, ptr %p
+  %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
+  ret <3 x double> %c
+}
+
 define <4 x double> @dup_v4double(double %a) {
 ; CHECK-SD-LABEL: dup_v4double:
 ; CHECK-SD:       // %bb.0: // %entry
@@ -1604,6 +2431,29 @@ entry:
   ret <4 x double> %c
 }
 
+define <4 x double> @loaddup_str_v4double(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4double:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT:    str xzr, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4double:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr d1, [x0]
+; CHECK-GI-NEXT:    str xzr, [x0]
+; CHECK-GI-NEXT:    dup v0.2d, v1.d[0]
+; CHECK-GI-NEXT:    dup v1.2d, v1.d[0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load double, ptr %p
+  %b = insertelement <4 x double> poison, double %a, i64 0
+  store double 0.0, ptr %p
+  %c = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> zeroinitializer
+  ret <4 x double> %c
+}
+
 define <2 x fp128> @dup_v2fp128(fp128 %a) {
 ; CHECK-LABEL: dup_v2fp128:
 ; CHECK:       // %bb.0: // %entry
@@ -1638,6 +2488,23 @@ entry:
   ret <2 x fp128> %c
 }
 
+define <2 x fp128> @loaddup_str_v2fp128(ptr %p) {
+; CHECK-LABEL: loaddup_str_v2fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    adrp x8, .LCPI155_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI155_0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    str q2, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load fp128, ptr %p
+  %b = insertelement <2 x fp128> poison, fp128 %a, i64 0
+  store fp128 0xL00000000000000000000000000000000, ptr %p
+  %c = shufflevector <2 x fp128> %b, <2 x fp128> poison, <2 x i32> zeroinitializer
+  ret <2 x fp128> %c
+}
+
 define <3 x fp128> @dup_v3fp128(fp128 %a) {
 ; CHECK-LABEL: dup_v3fp128:
 ; CHECK:       // %bb.0: // %entry
@@ -1675,6 +2542,24 @@ entry:
   ret <3 x fp128> %c
 }
 
+define <3 x fp128> @loaddup_str_v3fp128(ptr %p) {
+; CHECK-LABEL: loaddup_str_v3fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    adrp x8, .LCPI159_0
+; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI159_0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    str q3, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load fp128, ptr %p
+  %b = insertelement <3 x fp128> poison, fp128 %a, i64 0
+  store fp128 0xL00000000000000000000000000000000, ptr %p
+  %c = shufflevector <3 x fp128> %b, <3 x fp128> poison, <3 x i32> zeroinitializer
+  ret <3 x fp128> %c
+}
+
 define <4 x fp128> @dup_v4fp128(fp128 %a) {
 ; CHECK-LABEL: dup_v4fp128:
 ; CHECK:       // %bb.0: // %entry
@@ -1714,3 +2599,22 @@ entry:
   %c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
   ret <4 x fp128> %c
 }
+
+define <4 x fp128> @loaddup_str_v4fp128(ptr %p) {
+; CHECK-LABEL: loaddup_str_v4fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    adrp x8, .LCPI163_0
+; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI163_0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    mov v3.16b, v0.16b
+; CHECK-NEXT:    str q4, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load fp128, ptr %p
+  %b = insertelement <4 x fp128> poison, fp128 %a, i64 0
+  store fp128 0xL00000000000000000000000000000000, ptr %p
+  %c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
+  ret <4 x fp128> %c
+}


        


More information about the llvm-commits mailing list