[llvm] 1dfa34c - [AArch64] Extended Dup test coverage. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 8 14:36:53 PST 2024


Author: David Green
Date: 2024-12-08T22:36:48Z
New Revision: 1dfa34c8e1f28963f059e05ce89ebf1f76ebbddc

URL: https://github.com/llvm/llvm-project/commit/1dfa34c8e1f28963f059e05ce89ebf1f76ebbddc
DIFF: https://github.com/llvm/llvm-project/commit/1dfa34c8e1f28963f059e05ce89ebf1f76ebbddc.diff

LOG: [AArch64] Extended Dup test coverage. NFC

Added: 
    llvm/test/CodeGen/AArch64/dup.ll

Modified: 
    llvm/test/CodeGen/AArch64/arm64-dup.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index a25763e3b15907..4c28ea75922024 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -733,5 +733,5 @@ define <4 x i16> @dup_i16_v4i16_constant() {
 ; CHECK-GI-NEXT:    adrp x8, .LCPI50_0
 ; CHECK-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI50_0]
 ; CHECK-GI-NEXT:    ret
-    ret <4 x i16> <i16 9211, i16 9211, i16 9211, i16 9211>
+  ret <4 x i16> <i16 9211, i16 9211, i16 9211, i16 9211>
 }

diff  --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
new file mode 100644
index 00000000000000..a2ebdd28b16b8f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -0,0 +1,1698 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI:       warning: Instruction selection used fallback path for dup_v2i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for duplane0_v2i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_v2i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for dup_v2i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for duplane0_v2i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_v2i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for dup_v3i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for duplane0_v3i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_v3i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for dup_v4i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for duplane0_v4i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_v4i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for dup_v2fp128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for duplane0_v2fp128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_v2fp128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for dup_v3fp128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for duplane0_v3fp128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_v3fp128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for dup_v4fp128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for duplane0_v4fp128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for loaddup_v4fp128
+
+define <2 x i8> @dup_v2i8(i8 %a) {
+; CHECK-LABEL: dup_v2i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2s, w0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <2 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
+  ret <2 x i8> %c
+}
+
+define <2 x i8> @duplane0_v2i8(<2 x i8> %b) {
+; CHECK-LABEL: duplane0_v2i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.2s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
+  ret <2 x i8> %c
+}
+
+define <2 x i8> @loaddup_v2i8(ptr %p) {
+; CHECK-LABEL: loaddup_v2i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    dup v0.2s, w8
+; CHECK-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <2 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
+  ret <2 x i8> %c
+}
+
+define <3 x i8> @dup_v3i8(i8 %a) {
+; CHECK-SD-LABEL: dup_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov w1, w0
+; CHECK-SD-NEXT:    mov w2, w0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: dup_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    dup v0.8b, w0
+; CHECK-GI-NEXT:    umov w0, v0.b[0]
+; CHECK-GI-NEXT:    umov w1, v0.b[1]
+; CHECK-GI-NEXT:    umov w2, v0.b[2]
+; CHECK-GI-NEXT:    ret
+entry:
+  %b = insertelement <3 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <3 x i8> %b, <3 x i8> poison, <3 x i32> zeroinitializer
+  ret <3 x i8> %c
+}
+
+define <3 x i8> @duplane0_v3i8(<3 x i8> %b) {
+; CHECK-SD-LABEL: duplane0_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov w1, w0
+; CHECK-SD-NEXT:    mov w2, w0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: duplane0_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov s0, w0
+; CHECK-GI-NEXT:    mov v0.b[1], w1
+; CHECK-GI-NEXT:    mov v0.b[2], w2
+; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
+; CHECK-GI-NEXT:    umov w0, v0.b[0]
+; CHECK-GI-NEXT:    umov w1, v0.b[1]
+; CHECK-GI-NEXT:    umov w2, v0.b[2]
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = shufflevector <3 x i8> %b, <3 x i8> poison, <3 x i32> zeroinitializer
+  ret <3 x i8> %c
+}
+
+define <3 x i8> @loaddup_v3i8(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v3i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldrb w0, [x0]
+; CHECK-SD-NEXT:    mov w1, w0
+; CHECK-SD-NEXT:    mov w2, w0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v3i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.8b }, [x0]
+; CHECK-GI-NEXT:    umov w0, v0.b[0]
+; CHECK-GI-NEXT:    umov w1, v0.b[1]
+; CHECK-GI-NEXT:    umov w2, v0.b[2]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <3 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <3 x i8> %b, <3 x i8> poison, <3 x i32> zeroinitializer
+  ret <3 x i8> %c
+}
+
+define <4 x i8> @dup_v4i8(i8 %a) {
+; CHECK-SD-LABEL: dup_v4i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    dup v0.4h, w0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: dup_v4i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    dup v0.8b, w0
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %b = insertelement <4 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
+  ret <4 x i8> %c
+}
+
+define <4 x i8> @duplane0_v4i8(<4 x i8> %b) {
+; CHECK-SD-LABEL: duplane0_v4i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: duplane0_v4i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
+  ret <4 x i8> %c
+}
+
+define <4 x i8> @loaddup_v4i8(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v4i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    dup v0.4h, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v4i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.8b }, [x0]
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <4 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
+  ret <4 x i8> %c
+}
+
+define <8 x i8> @dup_v8i8(i8 %a) {
+; CHECK-LABEL: dup_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.8b, w0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <8 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
+  ret <8 x i8> %c
+}
+
+define <8 x i8> @duplane0_v8i8(<8 x i8> %b) {
+; CHECK-LABEL: duplane0_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.8b, v0.b[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
+  ret <8 x i8> %c
+}
+
+define <8 x i8> @loaddup_v8i8(ptr %p) {
+; CHECK-LABEL: loaddup_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.8b }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <8 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
+  ret <8 x i8> %c
+}
+
+define <16 x i8> @dup_v16i8(i8 %a) {
+; CHECK-LABEL: dup_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.16b, w0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <16 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
+  ret <16 x i8> %c
+}
+
+define <16 x i8> @duplane0_v16i8(<16 x i8> %b) {
+; CHECK-LABEL: duplane0_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.16b, v0.b[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
+  ret <16 x i8> %c
+}
+
+define <16 x i8> @loaddup_v16i8(ptr %p) {
+; CHECK-LABEL: loaddup_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.16b }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <16 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
+  ret <16 x i8> %c
+}
+
+define <32 x i8> @dup_v32i8(i8 %a) {
+; CHECK-LABEL: dup_v32i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.16b, w0
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <32 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
+  ret <32 x i8> %c
+}
+
+define <32 x i8> @duplane0_v32i8(<32 x i8> %b) {
+; CHECK-LABEL: duplane0_v32i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.16b, v0.b[0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
+  ret <32 x i8> %c
+}
+
+define <32 x i8> @loaddup_v32i8(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v32i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.16b }, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v32i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.16b }, [x0]
+; CHECK-GI-NEXT:    ld1r { v1.16b }, [x0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i8, ptr %p
+  %b = insertelement <32 x i8> poison, i8 %a, i64 0
+  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
+  ret <32 x i8> %c
+}
+
+define <2 x i16> @dup_v2i16(i16 %a) {
+; CHECK-SD-LABEL: dup_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    dup v0.2s, w0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: dup_v2i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    dup v0.4h, w0
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %b = insertelement <2 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
+  ret <2 x i16> %c
+}
+
+define <2 x i16> @duplane0_v2i16(<2 x i16> %b) {
+; CHECK-SD-LABEL: duplane0_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: duplane0_v2i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
+  ret <2 x i16> %c
+}
+
+define <2 x i16> @loaddup_v2i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v2i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldrh w8, [x0]
+; CHECK-SD-NEXT:    dup v0.2s, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v2i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <2 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
+  ret <2 x i16> %c
+}
+
+define <3 x i16> @dup_v3i16(i16 %a) {
+; CHECK-LABEL: dup_v3i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4h, w0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <3 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <3 x i16> %b, <3 x i16> poison, <3 x i32> zeroinitializer
+  ret <3 x i16> %c
+}
+
+define <3 x i16> @duplane0_v3i16(<3 x i16> %b) {
+; CHECK-LABEL: duplane0_v3i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <3 x i16> %b, <3 x i16> poison, <3 x i32> zeroinitializer
+  ret <3 x i16> %c
+}
+
+define <3 x i16> @loaddup_v3i16(ptr %p) {
+; CHECK-LABEL: loaddup_v3i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <3 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <3 x i16> %b, <3 x i16> poison, <3 x i32> zeroinitializer
+  ret <3 x i16> %c
+}
+
+define <4 x i16> @dup_v4i16(i16 %a) {
+; CHECK-LABEL: dup_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4h, w0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <4 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
+  ret <4 x i16> %c
+}
+
+define <4 x i16> @duplane0_v4i16(<4 x i16> %b) {
+; CHECK-LABEL: duplane0_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
+  ret <4 x i16> %c
+}
+
+define <4 x i16> @loaddup_v4i16(ptr %p) {
+; CHECK-LABEL: loaddup_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <4 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
+  ret <4 x i16> %c
+}
+
+define <8 x i16> @dup_v8i16(i16 %a) {
+; CHECK-LABEL: dup_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.8h, w0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <8 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %c
+}
+
+define <8 x i16> @duplane0_v8i16(<8 x i16> %b) {
+; CHECK-LABEL: duplane0_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %c
+}
+
+define <8 x i16> @loaddup_v8i16(ptr %p) {
+; CHECK-LABEL: loaddup_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <8 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %c
+}
+
+define <16 x i16> @dup_v16i16(i16 %a) {
+; CHECK-LABEL: dup_v16i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.8h, w0
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <16 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %c
+}
+
+define <16 x i16> @duplane0_v16i16(<16 x i16> %b) {
+; CHECK-LABEL: duplane0_v16i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %c
+}
+
+define <16 x i16> @loaddup_v16i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v16i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v16i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-GI-NEXT:    ld1r { v1.8h }, [x0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i16, ptr %p
+  %b = insertelement <16 x i16> poison, i16 %a, i64 0
+  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %c
+}
+
+define <2 x i32> @dup_v2i32(i32 %a) {
+; CHECK-LABEL: dup_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2s, w0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <2 x i32> poison, i32 %a, i64 0
+  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
+  ret <2 x i32> %c
+}
+
+define <2 x i32> @duplane0_v2i32(<2 x i32> %b) {
+; CHECK-LABEL: duplane0_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.2s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
+  ret <2 x i32> %c
+}
+
+define <2 x i32> @loaddup_v2i32(ptr %p) {
+; CHECK-LABEL: loaddup_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.2s }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i32, ptr %p
+  %b = insertelement <2 x i32> poison, i32 %a, i64 0
+  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
+  ret <2 x i32> %c
+}
+
+define <3 x i32> @dup_v3i32(i32 %a) {
+; CHECK-LABEL: dup_v3i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, w0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <3 x i32> poison, i32 %a, i64 0
+  %c = shufflevector <3 x i32> %b, <3 x i32> poison, <3 x i32> zeroinitializer
+  ret <3 x i32> %c
+}
+
+define <3 x i32> @duplane0_v3i32(<3 x i32> %b) {
+; CHECK-LABEL: duplane0_v3i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <3 x i32> %b, <3 x i32> poison, <3 x i32> zeroinitializer
+  ret <3 x i32> %c
+}
+
+define <3 x i32> @loaddup_v3i32(ptr %p) {
+; CHECK-LABEL: loaddup_v3i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i32, ptr %p
+  %b = insertelement <3 x i32> poison, i32 %a, i64 0
+  %c = shufflevector <3 x i32> %b, <3 x i32> poison, <3 x i32> zeroinitializer
+  ret <3 x i32> %c
+}
+
+define <4 x i32> @dup_v4i32(i32 %a) {
+; CHECK-LABEL: dup_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, w0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <4 x i32> poison, i32 %a, i64 0
+  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
+  ret <4 x i32> %c
+}
+
+define <4 x i32> @duplane0_v4i32(<4 x i32> %b) {
+; CHECK-LABEL: duplane0_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
+  ret <4 x i32> %c
+}
+
+define <4 x i32> @loaddup_v4i32(ptr %p) {
+; CHECK-LABEL: loaddup_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i32, ptr %p
+  %b = insertelement <4 x i32> poison, i32 %a, i64 0
+  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
+  ret <4 x i32> %c
+}
+
+define <8 x i32> @dup_v8i32(i32 %a) {
+; CHECK-LABEL: dup_v8i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, w0
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <8 x i32> poison, i32 %a, i64 0
+  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
+  ret <8 x i32> %c
+}
+
+define <8 x i32> @duplane0_v8i32(<8 x i32> %b) {
+; CHECK-LABEL: duplane0_v8i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
+  ret <8 x i32> %c
+}
+
+define <8 x i32> @loaddup_v8i32(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v8i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v8i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-GI-NEXT:    ld1r { v1.4s }, [x0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i32, ptr %p
+  %b = insertelement <8 x i32> poison, i32 %a, i64 0
+  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
+  ret <8 x i32> %c
+}
+
+define <2 x i64> @dup_v2i64(i64 %a) {
+; CHECK-LABEL: dup_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2d, x0
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <2 x i64> poison, i64 %a, i64 0
+  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
+  ret <2 x i64> %c
+}
+
+define <2 x i64> @duplane0_v2i64(<2 x i64> %b) {
+; CHECK-LABEL: duplane0_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
+  ret <2 x i64> %c
+}
+
+define <2 x i64> @loaddup_v2i64(ptr %p) {
+; CHECK-LABEL: loaddup_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load i64, ptr %p
+  %b = insertelement <2 x i64> poison, i64 %a, i64 0
+  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
+  ret <2 x i64> %c
+}
+
+define <3 x i64> @dup_v3i64(i64 %a) {
+; CHECK-SD-LABEL: dup_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov d0, x0
+; CHECK-SD-NEXT:    fmov d1, d0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: dup_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    dup v0.2d, x0
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    fmov d2, d0
+; CHECK-GI-NEXT:    ret
+entry:
+  %b = insertelement <3 x i64> poison, i64 %a, i64 0
+  %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
+  ret <3 x i64> %c
+}
+
+define <3 x i64> @duplane0_v3i64(<3 x i64> %b) {
+; CHECK-SD-LABEL: duplane0_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov d1, d0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: duplane0_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov d2, d0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
+  ret <3 x i64> %c
+}
+
+define <3 x i64> @loaddup_v3i64(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v3i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldr d0, [x0]
+; CHECK-SD-NEXT:    fmov d1, d0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v3i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-GI-NEXT:    ld1r { v2.2d }, [x0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i64, ptr %p
+  %b = insertelement <3 x i64> poison, i64 %a, i64 0
+  %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
+  ret <3 x i64> %c
+}
+
+define <4 x i64> @dup_v4i64(i64 %a) {
+; CHECK-LABEL: dup_v4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2d, x0
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <4 x i64> poison, i64 %a, i64 0
+  %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> zeroinitializer
+  ret <4 x i64> %c
+}
+
+define <4 x i64> @duplane0_v4i64(<4 x i64> %b) {
+; CHECK-LABEL: duplane0_v4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> zeroinitializer
+  ret <4 x i64> %c
+}
+
+define <4 x i64> @loaddup_v4i64(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v4i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v4i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-GI-NEXT:    ld1r { v1.2d }, [x0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load i64, ptr %p
+  %b = insertelement <4 x i64> poison, i64 %a, i64 0
+  %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> zeroinitializer
+  ret <4 x i64> %c
+}
+
+define <2 x i128> @dup_v2i128(i128 %a) {
+; CHECK-LABEL: dup_v2i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <2 x i128> poison, i128 %a, i64 0
+  %c = shufflevector <2 x i128> %b, <2 x i128> poison, <2 x i32> zeroinitializer
+  ret <2 x i128> %c
+}
+
+define <2 x i128> @duplane0_v2i128(<2 x i128> %b) {
+; CHECK-LABEL: duplane0_v2i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <2 x i128> %b, <2 x i128> poison, <2 x i32> zeroinitializer
+  ret <2 x i128> %c
+}
+
+define <2 x i128> @loaddup_v2i128(ptr %p) {
+; CHECK-LABEL: loaddup_v2i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldp x2, x1, [x0]
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    ret
+entry:
+  %a = load i128, ptr %p
+  %b = insertelement <2 x i128> poison, i128 %a, i64 0
+  %c = shufflevector <2 x i128> %b, <2 x i128> poison, <2 x i32> zeroinitializer
+  ret <2 x i128> %c
+}
+
+define <3 x i128> @dup_v3i128(i128 %a) {
+; CHECK-LABEL: dup_v3i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    mov x4, x0
+; CHECK-NEXT:    mov x5, x1
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <3 x i128> poison, i128 %a, i64 0
+  %c = shufflevector <3 x i128> %b, <3 x i128> poison, <3 x i32> zeroinitializer
+  ret <3 x i128> %c
+}
+
+define <3 x i128> @duplane0_v3i128(<3 x i128> %b) {
+; CHECK-LABEL: duplane0_v3i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    mov x4, x0
+; CHECK-NEXT:    mov x5, x1
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <3 x i128> %b, <3 x i128> poison, <3 x i32> zeroinitializer
+  ret <3 x i128> %c
+}
+
+define <3 x i128> @loaddup_v3i128(ptr %p) {
+; CHECK-LABEL: loaddup_v3i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldp x2, x1, [x0]
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    mov x4, x2
+; CHECK-NEXT:    mov x5, x1
+; CHECK-NEXT:    ret
+entry:
+  %a = load i128, ptr %p
+  %b = insertelement <3 x i128> poison, i128 %a, i64 0
+  %c = shufflevector <3 x i128> %b, <3 x i128> poison, <3 x i32> zeroinitializer
+  ret <3 x i128> %c
+}
+
+define <4 x i128> @dup_v4i128(i128 %a) {
+; CHECK-LABEL: dup_v4i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    mov x4, x0
+; CHECK-NEXT:    mov x5, x1
+; CHECK-NEXT:    mov x6, x0
+; CHECK-NEXT:    mov x7, x1
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <4 x i128> poison, i128 %a, i64 0
+  %c = shufflevector <4 x i128> %b, <4 x i128> poison, <4 x i32> zeroinitializer
+  ret <4 x i128> %c
+}
+
+define <4 x i128> @duplane0_v4i128(<4 x i128> %b) {
+; CHECK-LABEL: duplane0_v4i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    mov x4, x0
+; CHECK-NEXT:    mov x5, x1
+; CHECK-NEXT:    mov x6, x0
+; CHECK-NEXT:    mov x7, x1
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x i128> %b, <4 x i128> poison, <4 x i32> zeroinitializer
+  ret <4 x i128> %c
+}
+
+define <4 x i128> @loaddup_v4i128(ptr %p) {
+; CHECK-LABEL: loaddup_v4i128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldp x2, x1, [x0]
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    mov x3, x1
+; CHECK-NEXT:    mov x4, x2
+; CHECK-NEXT:    mov x5, x1
+; CHECK-NEXT:    mov x6, x2
+; CHECK-NEXT:    mov x7, x1
+; CHECK-NEXT:    ret
+entry:
+  %a = load i128, ptr %p
+  %b = insertelement <4 x i128> poison, i128 %a, i64 0
+  %c = shufflevector <4 x i128> %b, <4 x i128> poison, <4 x i32> zeroinitializer
+  ret <4 x i128> %c
+}
+
+define <2 x half> @dup_v2half(half %a) {
+; CHECK-LABEL: dup_v2half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <2 x half> poison, half %a, i64 0
+  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
+  ret <2 x half> %c
+}
+
+define <2 x half> @duplane0_v2half(<2 x half> %b) {
+; CHECK-LABEL: duplane0_v2half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
+  ret <2 x half> %c
+}
+
+define <2 x half> @loaddup_v2half(ptr %p) {
+; CHECK-LABEL: loaddup_v2half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <2 x half> poison, half %a, i64 0
+  %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
+  ret <2 x half> %c
+}
+
+define <3 x half> @dup_v3half(half %a) {
+; CHECK-LABEL: dup_v3half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <3 x half> poison, half %a, i64 0
+  %c = shufflevector <3 x half> %b, <3 x half> poison, <3 x i32> zeroinitializer
+  ret <3 x half> %c
+}
+
+define <3 x half> @duplane0_v3half(<3 x half> %b) {
+; CHECK-LABEL: duplane0_v3half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <3 x half> %b, <3 x half> poison, <3 x i32> zeroinitializer
+  ret <3 x half> %c
+}
+
+define <3 x half> @loaddup_v3half(ptr %p) {
+; CHECK-LABEL: loaddup_v3half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <3 x half> poison, half %a, i64 0
+  %c = shufflevector <3 x half> %b, <3 x half> poison, <3 x i32> zeroinitializer
+  ret <3 x half> %c
+}
+
+define <4 x half> @dup_v4half(half %a) {
+; CHECK-LABEL: dup_v4half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <4 x half> poison, half %a, i64 0
+  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
+  ret <4 x half> %c
+}
+
+define <4 x half> @duplane0_v4half(<4 x half> %b) {
+; CHECK-LABEL: duplane0_v4half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
+  ret <4 x half> %c
+}
+
+define <4 x half> @loaddup_v4half(ptr %p) {
+; CHECK-LABEL: loaddup_v4half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <4 x half> poison, half %a, i64 0
+  %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
+  ret <4 x half> %c
+}
+
+define <8 x half> @dup_v8half(half %a) {
+; CHECK-LABEL: dup_v8half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <8 x half> poison, half %a, i64 0
+  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
+  ret <8 x half> %c
+}
+
+define <8 x half> @duplane0_v8half(<8 x half> %b) {
+; CHECK-LABEL: duplane0_v8half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
+  ret <8 x half> %c
+}
+
+define <8 x half> @loaddup_v8half(ptr %p) {
+; CHECK-LABEL: loaddup_v8half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <8 x half> poison, half %a, i64 0
+  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
+  ret <8 x half> %c
+}
+
+define <16 x half> @dup_v16half(half %a) {
+; CHECK-SD-LABEL: dup_v16half:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-SD-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: dup_v16half:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-GI-NEXT:    dup v2.8h, v0.h[0]
+; CHECK-GI-NEXT:    dup v1.8h, v0.h[0]
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %b = insertelement <16 x half> poison, half %a, i64 0
+  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
+  ret <16 x half> %c
+}
+
+define <16 x half> @duplane0_v16half(<16 x half> %b) {
+; CHECK-LABEL: duplane0_v16half:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
+  ret <16 x half> %c
+}
+
+define <16 x half> @loaddup_v16half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v16half:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v16half:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-GI-NEXT:    ld1r { v1.8h }, [x0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load half, ptr %p
+  %b = insertelement <16 x half> poison, half %a, i64 0
+  %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
+  ret <16 x half> %c
+}
+
+define <2 x bfloat> @dup_v2bfloat(bfloat %a) {
+; CHECK-LABEL: dup_v2bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <2 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <2 x bfloat> %b, <2 x bfloat> poison, <2 x i32> zeroinitializer
+  ret <2 x bfloat> %c
+}
+
+define <2 x bfloat> @duplane0_v2bfloat(<2 x bfloat> %b) {
+; CHECK-LABEL: duplane0_v2bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <2 x bfloat> %b, <2 x bfloat> poison, <2 x i32> zeroinitializer
+  ret <2 x bfloat> %c
+}
+
+define <2 x bfloat> @loaddup_v2bfloat(ptr %p) {
+; CHECK-LABEL: loaddup_v2bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <2 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <2 x bfloat> %b, <2 x bfloat> poison, <2 x i32> zeroinitializer
+  ret <2 x bfloat> %c
+}
+
+define <3 x bfloat> @dup_v3bfloat(bfloat %a) {
+; CHECK-LABEL: dup_v3bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <3 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <3 x bfloat> %b, <3 x bfloat> poison, <3 x i32> zeroinitializer
+  ret <3 x bfloat> %c
+}
+
+define <3 x bfloat> @duplane0_v3bfloat(<3 x bfloat> %b) {
+; CHECK-LABEL: duplane0_v3bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <3 x bfloat> %b, <3 x bfloat> poison, <3 x i32> zeroinitializer
+  ret <3 x bfloat> %c
+}
+
+define <3 x bfloat> @loaddup_v3bfloat(ptr %p) {
+; CHECK-LABEL: loaddup_v3bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <3 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <3 x bfloat> %b, <3 x bfloat> poison, <3 x i32> zeroinitializer
+  ret <3 x bfloat> %c
+}
+
+define <4 x bfloat> @dup_v4bfloat(bfloat %a) {
+; CHECK-LABEL: dup_v4bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <4 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <4 x bfloat> %b, <4 x bfloat> poison, <4 x i32> zeroinitializer
+  ret <4 x bfloat> %c
+}
+
+define <4 x bfloat> @duplane0_v4bfloat(<4 x bfloat> %b) {
+; CHECK-LABEL: duplane0_v4bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x bfloat> %b, <4 x bfloat> poison, <4 x i32> zeroinitializer
+  ret <4 x bfloat> %c
+}
+
+define <4 x bfloat> @loaddup_v4bfloat(ptr %p) {
+; CHECK-LABEL: loaddup_v4bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <4 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <4 x bfloat> %b, <4 x bfloat> poison, <4 x i32> zeroinitializer
+  ret <4 x bfloat> %c
+}
+
+define <8 x bfloat> @dup_v8bfloat(bfloat %a) {
+; CHECK-LABEL: dup_v8bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <8 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
+  ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @duplane0_v8bfloat(<8 x bfloat> %b) {
+; CHECK-LABEL: duplane0_v8bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
+  ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @loaddup_v8bfloat(ptr %p) {
+; CHECK-LABEL: loaddup_v8bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <8 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
+  ret <8 x bfloat> %c
+}
+
+define <16 x bfloat> @dup_v16bfloat(bfloat %a) {
+; CHECK-SD-LABEL: dup_v16bfloat:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-SD-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: dup_v16bfloat:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-GI-NEXT:    dup v2.8h, v0.h[0]
+; CHECK-GI-NEXT:    dup v1.8h, v0.h[0]
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %b = insertelement <16 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <16 x bfloat> %b, <16 x bfloat> poison, <16 x i32> zeroinitializer
+  ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @duplane0_v16bfloat(<16 x bfloat> %b) {
+; CHECK-LABEL: duplane0_v16bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.8h, v0.h[0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <16 x bfloat> %b, <16 x bfloat> poison, <16 x i32> zeroinitializer
+  ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @loaddup_v16bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v16bfloat:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v16bfloat:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.8h }, [x0]
+; CHECK-GI-NEXT:    ld1r { v1.8h }, [x0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load bfloat, ptr %p
+  %b = insertelement <16 x bfloat> poison, bfloat %a, i64 0
+  %c = shufflevector <16 x bfloat> %b, <16 x bfloat> poison, <16 x i32> zeroinitializer
+  ret <16 x bfloat> %c
+}
+
+define <2 x float> @dup_v2float(float %a) {
+; CHECK-LABEL: dup_v2float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    dup v0.2s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <2 x float> poison, float %a, i64 0
+  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
+  ret <2 x float> %c
+}
+
+define <2 x float> @duplane0_v2float(<2 x float> %b) {
+; CHECK-LABEL: duplane0_v2float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.2s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
+  ret <2 x float> %c
+}
+
+define <2 x float> @loaddup_v2float(ptr %p) {
+; CHECK-LABEL: loaddup_v2float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.2s }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load float, ptr %p
+  %b = insertelement <2 x float> poison, float %a, i64 0
+  %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
+  ret <2 x float> %c
+}
+
+define <3 x float> @dup_v3float(float %a) {
+; CHECK-LABEL: dup_v3float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <3 x float> poison, float %a, i64 0
+  %c = shufflevector <3 x float> %b, <3 x float> poison, <3 x i32> zeroinitializer
+  ret <3 x float> %c
+}
+
+define <3 x float> @duplane0_v3float(<3 x float> %b) {
+; CHECK-LABEL: duplane0_v3float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <3 x float> %b, <3 x float> poison, <3 x i32> zeroinitializer
+  ret <3 x float> %c
+}
+
+define <3 x float> @loaddup_v3float(ptr %p) {
+; CHECK-LABEL: loaddup_v3float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load float, ptr %p
+  %b = insertelement <3 x float> poison, float %a, i64 0
+  %c = shufflevector <3 x float> %b, <3 x float> poison, <3 x i32> zeroinitializer
+  ret <3 x float> %c
+}
+
+define <4 x float> @dup_v4float(float %a) {
+; CHECK-LABEL: dup_v4float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <4 x float> poison, float %a, i64 0
+  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
+  ret <4 x float> %c
+}
+
+define <4 x float> @duplane0_v4float(<4 x float> %b) {
+; CHECK-LABEL: duplane0_v4float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
+  ret <4 x float> %c
+}
+
+define <4 x float> @loaddup_v4float(ptr %p) {
+; CHECK-LABEL: loaddup_v4float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load float, ptr %p
+  %b = insertelement <4 x float> poison, float %a, i64 0
+  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
+  ret <4 x float> %c
+}
+
+define <8 x float> @dup_v8float(float %a) {
+; CHECK-SD-LABEL: dup_v8float:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: dup_v8float:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    dup v2.4s, v0.s[0]
+; CHECK-GI-NEXT:    dup v1.4s, v0.s[0]
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %b = insertelement <8 x float> poison, float %a, i64 0
+  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
+  ret <8 x float> %c
+}
+
+define <8 x float> @duplane0_v8float(<8 x float> %b) {
+; CHECK-LABEL: duplane0_v8float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
+  ret <8 x float> %c
+}
+
+define <8 x float> @loaddup_v8float(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v8float:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v8float:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.4s }, [x0]
+; CHECK-GI-NEXT:    ld1r { v1.4s }, [x0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load float, ptr %p
+  %b = insertelement <8 x float> poison, float %a, i64 0
+  %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
+  ret <8 x float> %c
+}
+
+define <2 x double> @dup_v2double(double %a) {
+; CHECK-LABEL: dup_v2double:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <2 x double> poison, double %a, i64 0
+  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
+  ret <2 x double> %c
+}
+
+define <2 x double> @duplane0_v2double(<2 x double> %b) {
+; CHECK-LABEL: duplane0_v2double:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
+  ret <2 x double> %c
+}
+
+define <2 x double> @loaddup_v2double(ptr %p) {
+; CHECK-LABEL: loaddup_v2double:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %a = load double, ptr %p
+  %b = insertelement <2 x double> poison, double %a, i64 0
+  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
+  ret <2 x double> %c
+}
+
+define <3 x double> @dup_v3double(double %a) {
+; CHECK-SD-LABEL: dup_v3double:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov d1, d0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: dup_v3double:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    dup v3.2d, v0.d[0]
+; CHECK-GI-NEXT:    dup v2.2d, v0.d[0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    mov d1, v3.d[1]
+; CHECK-GI-NEXT:    fmov d0, d3
+; CHECK-GI-NEXT:    ret
+entry:
+  %b = insertelement <3 x double> poison, double %a, i64 0
+  %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
+  ret <3 x double> %c
+}
+
+define <3 x double> @duplane0_v3double(<3 x double> %b) {
+; CHECK-SD-LABEL: duplane0_v3double:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov d1, d0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: duplane0_v3double:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov d2, d0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
+  ret <3 x double> %c
+}
+
+define <3 x double> @loaddup_v3double(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v3double:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldr d0, [x0]
+; CHECK-SD-NEXT:    fmov d1, d0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v3double:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-GI-NEXT:    ld1r { v2.2d }, [x0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load double, ptr %p
+  %b = insertelement <3 x double> poison, double %a, i64 0
+  %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
+  ret <3 x double> %c
+}
+
+define <4 x double> @dup_v4double(double %a) {
+; CHECK-SD-LABEL: dup_v4double:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: dup_v4double:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    dup v2.2d, v0.d[0]
+; CHECK-GI-NEXT:    dup v1.2d, v0.d[0]
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %b = insertelement <4 x double> poison, double %a, i64 0
+  %c = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> zeroinitializer
+  ret <4 x double> %c
+}
+
+define <4 x double> @duplane0_v4double(<4 x double> %b) {
+; CHECK-LABEL: duplane0_v4double:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> zeroinitializer
+  ret <4 x double> %c
+}
+
+define <4 x double> @loaddup_v4double(ptr %p) {
+; CHECK-SD-LABEL: loaddup_v4double:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: loaddup_v4double:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-GI-NEXT:    ld1r { v1.2d }, [x0]
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = load double, ptr %p
+  %b = insertelement <4 x double> poison, double %a, i64 0
+  %c = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> zeroinitializer
+  ret <4 x double> %c
+}
+
+define <2 x fp128> @dup_v2fp128(fp128 %a) {
+; CHECK-LABEL: dup_v2fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <2 x fp128> poison, fp128 %a, i64 0
+  %c = shufflevector <2 x fp128> %b, <2 x fp128> poison, <2 x i32> zeroinitializer
+  ret <2 x fp128> %c
+}
+
+define <2 x fp128> @duplane0_v2fp128(<2 x fp128> %b) {
+; CHECK-LABEL: duplane0_v2fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <2 x fp128> %b, <2 x fp128> poison, <2 x i32> zeroinitializer
+  ret <2 x fp128> %c
+}
+
+define <2 x fp128> @loaddup_v2fp128(ptr %p) {
+; CHECK-LABEL: loaddup_v2fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %a = load fp128, ptr %p
+  %b = insertelement <2 x fp128> poison, fp128 %a, i64 0
+  %c = shufflevector <2 x fp128> %b, <2 x fp128> poison, <2 x i32> zeroinitializer
+  ret <2 x fp128> %c
+}
+
+define <3 x fp128> @dup_v3fp128(fp128 %a) {
+; CHECK-LABEL: dup_v3fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <3 x fp128> poison, fp128 %a, i64 0
+  %c = shufflevector <3 x fp128> %b, <3 x fp128> poison, <3 x i32> zeroinitializer
+  ret <3 x fp128> %c
+}
+
+define <3 x fp128> @duplane0_v3fp128(<3 x fp128> %b) {
+; CHECK-LABEL: duplane0_v3fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <3 x fp128> %b, <3 x fp128> poison, <3 x i32> zeroinitializer
+  ret <3 x fp128> %c
+}
+
+define <3 x fp128> @loaddup_v3fp128(ptr %p) {
+; CHECK-LABEL: loaddup_v3fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %a = load fp128, ptr %p
+  %b = insertelement <3 x fp128> poison, fp128 %a, i64 0
+  %c = shufflevector <3 x fp128> %b, <3 x fp128> poison, <3 x i32> zeroinitializer
+  ret <3 x fp128> %c
+}
+
+define <4 x fp128> @dup_v4fp128(fp128 %a) {
+; CHECK-LABEL: dup_v4fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    mov v3.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <4 x fp128> poison, fp128 %a, i64 0
+  %c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
+  ret <4 x fp128> %c
+}
+
+define <4 x fp128> @duplane0_v4fp128(<4 x fp128> %b) {
+; CHECK-LABEL: duplane0_v4fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    mov v3.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
+  ret <4 x fp128> %c
+}
+
+define <4 x fp128> @loaddup_v4fp128(ptr %p) {
+; CHECK-LABEL: loaddup_v4fp128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v2.16b, v0.16b
+; CHECK-NEXT:    mov v3.16b, v0.16b
+; CHECK-NEXT:    ret
+entry:
+  %a = load fp128, ptr %p
+  %b = insertelement <4 x fp128> poison, fp128 %a, i64 0
+  %c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
+  ret <4 x fp128> %c
+}


        


More information about the llvm-commits mailing list