[llvm] 55b6c3e - [AArch64][GlobalISel] Add dup tests where load is not combined into dup. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 8 13:34:30 PDT 2025
Author: David Green
Date: 2025-06-08T21:34:26+01:00
New Revision: 55b6c3ed17c8937a99a1b787164417157ab871b9
URL: https://github.com/llvm/llvm-project/commit/55b6c3ed17c8937a99a1b787164417157ab871b9
DIFF: https://github.com/llvm/llvm-project/commit/55b6c3ed17c8937a99a1b787164417157ab871b9.diff
LOG: [AArch64][GlobalISel] Add dup tests where load is not combined into dup. NFC
Added:
Modified:
llvm/test/CodeGen/AArch64/dup.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index bfc0ef0826f68..bdeab033ce084 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -5,6 +5,12 @@
; CHECK-GI: warning: Instruction selection used fallback path for dup_v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v2i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v3i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v4i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v8i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v16i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v32i8
define <2 x i8> @dup_v2i8(i8 %a) {
; CHECK-LABEL: dup_v2i8:
@@ -41,6 +47,21 @@ entry:
ret <2 x i8> %c
}
+define <2 x i8> @loaddup_str_v2i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v2i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrb w8, [x0]
+; CHECK-NEXT: strb wzr, [x0]
+; CHECK-NEXT: dup v0.2s, w8
+; CHECK-NEXT: ret
+entry:
+ %a = load i8, ptr %p
+ %b = insertelement <2 x i8> poison, i8 %a, i64 0
+ store i8 0, ptr %p
+ %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
+ ret <2 x i8> %c
+}
+
define <3 x i8> @dup_v3i8(i8 %a) {
; CHECK-SD-LABEL: dup_v3i8:
; CHECK-SD: // %bb.0: // %entry
@@ -105,6 +126,23 @@ entry:
ret <3 x i8> %c
}
+define <3 x i8> @loaddup_str_v3i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v3i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: ldrb w0, [x0]
+; CHECK-NEXT: strb wzr, [x8]
+; CHECK-NEXT: mov w1, w0
+; CHECK-NEXT: mov w2, w0
+; CHECK-NEXT: ret
+entry:
+ %a = load i8, ptr %p
+ %b = insertelement <3 x i8> poison, i8 %a, i64 0
+ store i8 0, ptr %p
+ %c = shufflevector <3 x i8> %b, <3 x i8> poison, <3 x i32> zeroinitializer
+ ret <3 x i8> %c
+}
+
define <4 x i8> @dup_v4i8(i8 %a) {
; CHECK-SD-LABEL: dup_v4i8:
; CHECK-SD: // %bb.0: // %entry
@@ -162,6 +200,21 @@ entry:
ret <4 x i8> %c
}
+define <4 x i8> @loaddup_str_v4i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v4i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldrb w8, [x0]
+; CHECK-NEXT: strb wzr, [x0]
+; CHECK-NEXT: dup v0.4h, w8
+; CHECK-NEXT: ret
+entry:
+ %a = load i8, ptr %p
+ %b = insertelement <4 x i8> poison, i8 %a, i64 0
+ store i8 0, ptr %p
+ %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
+ ret <4 x i8> %c
+}
+
define <8 x i8> @dup_v8i8(i8 %a) {
; CHECK-LABEL: dup_v8i8:
; CHECK: // %bb.0: // %entry
@@ -196,6 +249,20 @@ entry:
ret <8 x i8> %c
}
+define <8 x i8> @loaddup_str_v8i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ld1r { v0.8b }, [x0]
+; CHECK-NEXT: strb wzr, [x0]
+; CHECK-NEXT: ret
+entry:
+ %a = load i8, ptr %p
+ %b = insertelement <8 x i8> poison, i8 %a, i64 0
+ store i8 0, ptr %p
+ %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
+ ret <8 x i8> %c
+}
+
define <16 x i8> @dup_v16i8(i8 %a) {
; CHECK-LABEL: dup_v16i8:
; CHECK: // %bb.0: // %entry
@@ -229,6 +296,20 @@ entry:
ret <16 x i8> %c
}
+define <16 x i8> @loaddup_str_v16i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ld1r { v0.16b }, [x0]
+; CHECK-NEXT: strb wzr, [x0]
+; CHECK-NEXT: ret
+entry:
+ %a = load i8, ptr %p
+ %b = insertelement <16 x i8> poison, i8 %a, i64 0
+ store i8 0, ptr %p
+ %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
+ ret <16 x i8> %c
+}
+
define <32 x i8> @dup_v32i8(i8 %a) {
; CHECK-LABEL: dup_v32i8:
; CHECK: // %bb.0: // %entry
@@ -271,6 +352,21 @@ entry:
ret <32 x i8> %c
}
+define <32 x i8> @loaddup_str_v32i8(ptr %p) {
+; CHECK-LABEL: loaddup_str_v32i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ld1r { v0.16b }, [x0]
+; CHECK-NEXT: strb wzr, [x0]
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: ret
+entry:
+ %a = load i8, ptr %p
+ %b = insertelement <32 x i8> poison, i8 %a, i64 0
+ store i8 0, ptr %p
+ %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
+ ret <32 x i8> %c
+}
+
define <2 x i16> @dup_v2i16(i16 %a) {
; CHECK-SD-LABEL: dup_v2i16:
; CHECK-SD: // %bb.0: // %entry
@@ -328,6 +424,30 @@ entry:
ret <2 x i16> %c
}
+define <2 x i16> @loaddup_str_v2i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldrh w8, [x0]
+; CHECK-SD-NEXT: strh wzr, [x0]
+; CHECK-SD-NEXT: dup v0.2s, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i16, ptr %p
+ %b = insertelement <2 x i16> poison, i16 %a, i64 0
+ store i16 0, ptr %p
+ %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
+ ret <2 x i16> %c
+}
+
define <3 x i16> @dup_v3i16(i16 %a) {
; CHECK-LABEL: dup_v3i16:
; CHECK: // %bb.0: // %entry
@@ -362,6 +482,27 @@ entry:
ret <3 x i16> %c
}
+define <3 x i16> @loaddup_str_v3i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT: strh wzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i16, ptr %p
+ %b = insertelement <3 x i16> poison, i16 %a, i64 0
+ store i16 0, ptr %p
+ %c = shufflevector <3 x i16> %b, <3 x i16> poison, <3 x i32> zeroinitializer
+ ret <3 x i16> %c
+}
+
define <4 x i16> @dup_v4i16(i16 %a) {
; CHECK-LABEL: dup_v4i16:
; CHECK: // %bb.0: // %entry
@@ -396,6 +537,27 @@ entry:
ret <4 x i16> %c
}
+define <4 x i16> @loaddup_str_v4i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT: strh wzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i16, ptr %p
+ %b = insertelement <4 x i16> poison, i16 %a, i64 0
+ store i16 0, ptr %p
+ %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
+ ret <4 x i16> %c
+}
+
define <8 x i16> @dup_v8i16(i16 %a) {
; CHECK-LABEL: dup_v8i16:
; CHECK: // %bb.0: // %entry
@@ -429,6 +591,27 @@ entry:
ret <8 x i16> %c
}
+define <8 x i16> @loaddup_str_v8i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT: strh wzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i16, ptr %p
+ %b = insertelement <8 x i16> poison, i16 %a, i64 0
+ store i16 0, ptr %p
+ %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %c
+}
+
define <16 x i16> @dup_v16i16(i16 %a) {
; CHECK-LABEL: dup_v16i16:
; CHECK: // %bb.0: // %entry
@@ -471,6 +654,29 @@ entry:
ret <16 x i16> %c
}
+define <16 x i16> @loaddup_str_v16i16(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v16i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT: strh wzr, [x0]
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v16i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h1, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i16, ptr %p
+ %b = insertelement <16 x i16> poison, i16 %a, i64 0
+ store i16 0, ptr %p
+ %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
+ ret <16 x i16> %c
+}
+
define <2 x i32> @dup_v2i32(i32 %a) {
; CHECK-LABEL: dup_v2i32:
; CHECK: // %bb.0: // %entry
@@ -505,6 +711,27 @@ entry:
ret <2 x i32> %c
}
+define <2 x i32> @loaddup_str_v2i32(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.2s }, [x0]
+; CHECK-SD-NEXT: str wzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: str wzr, [x0]
+; CHECK-GI-NEXT: dup v0.2s, v0.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i32, ptr %p
+ %b = insertelement <2 x i32> poison, i32 %a, i64 0
+ store i32 0, ptr %p
+ %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
+ ret <2 x i32> %c
+}
+
define <3 x i32> @dup_v3i32(i32 %a) {
; CHECK-LABEL: dup_v3i32:
; CHECK: // %bb.0: // %entry
@@ -538,6 +765,27 @@ entry:
ret <3 x i32> %c
}
+define <3 x i32> @loaddup_str_v3i32(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT: str wzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: str wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4s, v0.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i32, ptr %p
+ %b = insertelement <3 x i32> poison, i32 %a, i64 0
+ store i32 0, ptr %p
+ %c = shufflevector <3 x i32> %b, <3 x i32> poison, <3 x i32> zeroinitializer
+ ret <3 x i32> %c
+}
+
define <4 x i32> @dup_v4i32(i32 %a) {
; CHECK-LABEL: dup_v4i32:
; CHECK: // %bb.0: // %entry
@@ -571,6 +819,27 @@ entry:
ret <4 x i32> %c
}
+define <4 x i32> @loaddup_str_v4i32(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT: str wzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: str wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4s, v0.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i32, ptr %p
+ %b = insertelement <4 x i32> poison, i32 %a, i64 0
+ store i32 0, ptr %p
+ %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
+ ret <4 x i32> %c
+}
+
define <8 x i32> @dup_v8i32(i32 %a) {
; CHECK-LABEL: dup_v8i32:
; CHECK: // %bb.0: // %entry
@@ -613,6 +882,29 @@ entry:
ret <8 x i32> %c
}
+define <8 x i32> @loaddup_str_v8i32(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT: str wzr, [x0]
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s1, [x0]
+; CHECK-GI-NEXT: str wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4s, v1.s[0]
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i32, ptr %p
+ %b = insertelement <8 x i32> poison, i32 %a, i64 0
+ store i32 0, ptr %p
+ %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
+ ret <8 x i32> %c
+}
+
define <2 x i64> @dup_v2i64(i64 %a) {
; CHECK-LABEL: dup_v2i64:
; CHECK: // %bb.0: // %entry
@@ -646,6 +938,27 @@ entry:
ret <2 x i64> %c
}
+define <2 x i64> @loaddup_str_v2i64(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT: str xzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: str xzr, [x0]
+; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i64, ptr %p
+ %b = insertelement <2 x i64> poison, i64 %a, i64 0
+ store i64 0, ptr %p
+ %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
+ ret <2 x i64> %c
+}
+
define <3 x i64> @dup_v3i64(i64 %a) {
; CHECK-SD-LABEL: dup_v3i64:
; CHECK-SD: // %bb.0: // %entry
@@ -711,6 +1024,33 @@ entry:
ret <3 x i64> %c
}
+define <3 x i64> @loaddup_str_v3i64(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: str xzr, [x0]
+; CHECK-SD-NEXT: fmov d1, d0
+; CHECK-SD-NEXT: fmov d2, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr d2, [x0]
+; CHECK-GI-NEXT: str xzr, [x0]
+; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
+; CHECK-GI-NEXT: dup v2.2d, v2.d[0]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i64, ptr %p
+ %b = insertelement <3 x i64> poison, i64 %a, i64 0
+ store i64 0, ptr %p
+ %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
+ ret <3 x i64> %c
+}
+
define <4 x i64> @dup_v4i64(i64 %a) {
; CHECK-LABEL: dup_v4i64:
; CHECK: // %bb.0: // %entry
@@ -753,6 +1093,29 @@ entry:
ret <4 x i64> %c
}
+define <4 x i64> @loaddup_str_v4i64(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT: str xzr, [x0]
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr d1, [x0]
+; CHECK-GI-NEXT: str xzr, [x0]
+; CHECK-GI-NEXT: dup v0.2d, v1.d[0]
+; CHECK-GI-NEXT: dup v1.2d, v1.d[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i64, ptr %p
+ %b = insertelement <4 x i64> poison, i64 %a, i64 0
+ store i64 0, ptr %p
+ %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> zeroinitializer
+ ret <4 x i64> %c
+}
+
define <2 x i128> @dup_v2i128(i128 %a) {
; CHECK-LABEL: dup_v2i128:
; CHECK: // %bb.0: // %entry
@@ -800,6 +1163,38 @@ entry:
ret <2 x i128> %c
}
+define <2 x i128> @loaddup_str_v2i128(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, x0
+; CHECK-SD-NEXT: ldr x0, [x0]
+; CHECK-SD-NEXT: ldr x1, [x8, #8]
+; CHECK-SD-NEXT: stp xzr, xzr, [x8]
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr q1, [x0]
+; CHECK-GI-NEXT: mov v0.d[0], xzr
+; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: mov d2, v1.d[1]
+; CHECK-GI-NEXT: fmov x0, d1
+; CHECK-GI-NEXT: fmov x2, d1
+; CHECK-GI-NEXT: mov v0.d[1], xzr
+; CHECK-GI-NEXT: fmov x1, d2
+; CHECK-GI-NEXT: fmov x3, d2
+; CHECK-GI-NEXT: str q0, [x8]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i128, ptr %p
+ %b = insertelement <2 x i128> poison, i128 %a, i64 0
+ store i128 0, ptr %p
+ %c = shufflevector <2 x i128> %b, <2 x i128> poison, <2 x i32> zeroinitializer
+ ret <2 x i128> %c
+}
+
define <3 x i128> @dup_v3i128(i128 %a) {
; CHECK-LABEL: dup_v3i128:
; CHECK: // %bb.0: // %entry
@@ -855,6 +1250,41 @@ entry:
ret <3 x i128> %c
}
+define <3 x i128> @loaddup_str_v3i128(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, x0
+; CHECK-SD-NEXT: ldp x0, x1, [x0]
+; CHECK-SD-NEXT: stp xzr, xzr, [x8]
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr q1, [x0]
+; CHECK-GI-NEXT: mov v0.d[0], xzr
+; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: mov d2, v1.d[1]
+; CHECK-GI-NEXT: fmov x0, d1
+; CHECK-GI-NEXT: fmov x2, d1
+; CHECK-GI-NEXT: fmov x4, d1
+; CHECK-GI-NEXT: mov v0.d[1], xzr
+; CHECK-GI-NEXT: fmov x1, d2
+; CHECK-GI-NEXT: fmov x3, d2
+; CHECK-GI-NEXT: fmov x5, d2
+; CHECK-GI-NEXT: str q0, [x8]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i128, ptr %p
+ %b = insertelement <3 x i128> poison, i128 %a, i64 0
+ store i128 0, ptr %p
+ %c = shufflevector <3 x i128> %b, <3 x i128> poison, <3 x i32> zeroinitializer
+ ret <3 x i128> %c
+}
+
define <4 x i128> @dup_v4i128(i128 %a) {
; CHECK-LABEL: dup_v4i128:
; CHECK: // %bb.0: // %entry
@@ -918,6 +1348,45 @@ entry:
ret <4 x i128> %c
}
+define <4 x i128> @loaddup_str_v4i128(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, x0
+; CHECK-SD-NEXT: ldp x0, x1, [x0]
+; CHECK-SD-NEXT: stp xzr, xzr, [x8]
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x6, x0
+; CHECK-SD-NEXT: mov x7, x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr q1, [x0]
+; CHECK-GI-NEXT: mov v0.d[0], xzr
+; CHECK-GI-NEXT: mov x8, x0
+; CHECK-GI-NEXT: mov d2, v1.d[1]
+; CHECK-GI-NEXT: fmov x0, d1
+; CHECK-GI-NEXT: fmov x2, d1
+; CHECK-GI-NEXT: fmov x4, d1
+; CHECK-GI-NEXT: fmov x6, d1
+; CHECK-GI-NEXT: mov v0.d[1], xzr
+; CHECK-GI-NEXT: fmov x1, d2
+; CHECK-GI-NEXT: fmov x3, d2
+; CHECK-GI-NEXT: fmov x5, d2
+; CHECK-GI-NEXT: fmov x7, d2
+; CHECK-GI-NEXT: str q0, [x8]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load i128, ptr %p
+ %b = insertelement <4 x i128> poison, i128 %a, i64 0
+ store i128 0, ptr %p
+ %c = shufflevector <4 x i128> %b, <4 x i128> poison, <4 x i32> zeroinitializer
+ ret <4 x i128> %c
+}
+
define <2 x half> @dup_v2half(half %a) {
; CHECK-LABEL: dup_v2half:
; CHECK: // %bb.0: // %entry
@@ -953,6 +1422,28 @@ entry:
ret <2 x half> %c
}
+define <2 x half> @loaddup_str_v2half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2half:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT: str h1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2half:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load half, ptr %p
+ %b = insertelement <2 x half> poison, half %a, i64 0
+ store half 0.0, ptr %p
+ %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
+ ret <2 x half> %c
+}
+
define <3 x half> @dup_v3half(half %a) {
; CHECK-LABEL: dup_v3half:
; CHECK: // %bb.0: // %entry
@@ -988,6 +1479,28 @@ entry:
ret <3 x half> %c
}
+define <3 x half> @loaddup_str_v3half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3half:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT: str h1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3half:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load half, ptr %p
+ %b = insertelement <3 x half> poison, half %a, i64 0
+ store half 0.0, ptr %p
+ %c = shufflevector <3 x half> %b, <3 x half> poison, <3 x i32> zeroinitializer
+ ret <3 x half> %c
+}
+
define <4 x half> @dup_v4half(half %a) {
; CHECK-LABEL: dup_v4half:
; CHECK: // %bb.0: // %entry
@@ -1023,6 +1536,28 @@ entry:
ret <4 x half> %c
}
+define <4 x half> @loaddup_str_v4half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4half:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT: str h1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4half:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load half, ptr %p
+ %b = insertelement <4 x half> poison, half %a, i64 0
+ store half 0.0, ptr %p
+ %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
+ ret <4 x half> %c
+}
+
define <8 x half> @dup_v8half(half %a) {
; CHECK-LABEL: dup_v8half:
; CHECK: // %bb.0: // %entry
@@ -1057,6 +1592,28 @@ entry:
ret <8 x half> %c
}
+define <8 x half> @loaddup_str_v8half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8half:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT: str h1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8half:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load half, ptr %p
+ %b = insertelement <8 x half> poison, half %a, i64 0
+ store half 0.0, ptr %p
+ %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
+ ret <8 x half> %c
+}
+
define <16 x half> @dup_v16half(half %a) {
; CHECK-SD-LABEL: dup_v16half:
; CHECK-SD: // %bb.0: // %entry
@@ -1108,6 +1665,30 @@ entry:
ret <16 x half> %c
}
+define <16 x half> @loaddup_str_v16half(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v16half:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT: movi d2, #0000000000000000
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: str h2, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v16half:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h1, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load half, ptr %p
+ %b = insertelement <16 x half> poison, half %a, i64 0
+ store half 0.0, ptr %p
+ %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
+ ret <16 x half> %c
+}
+
define <2 x bfloat> @dup_v2bfloat(bfloat %a) {
; CHECK-LABEL: dup_v2bfloat:
; CHECK: // %bb.0: // %entry
@@ -1143,6 +1724,28 @@ entry:
ret <2 x bfloat> %c
}
+define <2 x bfloat> @loaddup_str_v2bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2bfloat:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT: str h1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2bfloat:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load bfloat, ptr %p
+ %b = insertelement <2 x bfloat> poison, bfloat %a, i64 0
+ store bfloat 0.0, ptr %p
+ %c = shufflevector <2 x bfloat> %b, <2 x bfloat> poison, <2 x i32> zeroinitializer
+ ret <2 x bfloat> %c
+}
+
define <3 x bfloat> @dup_v3bfloat(bfloat %a) {
; CHECK-LABEL: dup_v3bfloat:
; CHECK: // %bb.0: // %entry
@@ -1178,6 +1781,28 @@ entry:
ret <3 x bfloat> %c
}
+define <3 x bfloat> @loaddup_str_v3bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3bfloat:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT: str h1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3bfloat:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load bfloat, ptr %p
+ %b = insertelement <3 x bfloat> poison, bfloat %a, i64 0
+ store bfloat 0.0, ptr %p
+ %c = shufflevector <3 x bfloat> %b, <3 x bfloat> poison, <3 x i32> zeroinitializer
+ ret <3 x bfloat> %c
+}
+
define <4 x bfloat> @dup_v4bfloat(bfloat %a) {
; CHECK-LABEL: dup_v4bfloat:
; CHECK: // %bb.0: // %entry
@@ -1213,6 +1838,28 @@ entry:
ret <4 x bfloat> %c
}
+define <4 x bfloat> @loaddup_str_v4bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4bfloat:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ld1r { v0.4h }, [x0]
+; CHECK-SD-NEXT: str h1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4bfloat:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load bfloat, ptr %p
+ %b = insertelement <4 x bfloat> poison, bfloat %a, i64 0
+ store bfloat 0.0, ptr %p
+ %c = shufflevector <4 x bfloat> %b, <4 x bfloat> poison, <4 x i32> zeroinitializer
+ ret <4 x bfloat> %c
+}
+
define <8 x bfloat> @dup_v8bfloat(bfloat %a) {
; CHECK-LABEL: dup_v8bfloat:
; CHECK: // %bb.0: // %entry
@@ -1247,6 +1894,28 @@ entry:
ret <8 x bfloat> %c
}
+define <8 x bfloat> @loaddup_str_v8bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8bfloat:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT: str h1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8bfloat:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load bfloat, ptr %p
+ %b = insertelement <8 x bfloat> poison, bfloat %a, i64 0
+ store bfloat 0.0, ptr %p
+ %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
define <16 x bfloat> @dup_v16bfloat(bfloat %a) {
; CHECK-SD-LABEL: dup_v16bfloat:
; CHECK-SD: // %bb.0: // %entry
@@ -1298,6 +1967,30 @@ entry:
ret <16 x bfloat> %c
}
+define <16 x bfloat> @loaddup_str_v16bfloat(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v16bfloat:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.8h }, [x0]
+; CHECK-SD-NEXT: movi d2, #0000000000000000
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: str h2, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v16bfloat:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr h1, [x0]
+; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
+; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load bfloat, ptr %p
+ %b = insertelement <16 x bfloat> poison, bfloat %a, i64 0
+ store bfloat 0.0, ptr %p
+ %c = shufflevector <16 x bfloat> %b, <16 x bfloat> poison, <16 x i32> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
define <2 x float> @dup_v2float(float %a) {
; CHECK-LABEL: dup_v2float:
; CHECK: // %bb.0: // %entry
@@ -1333,6 +2026,27 @@ entry:
ret <2 x float> %c
}
+define <2 x float> @loaddup_str_v2float(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2float:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.2s }, [x0]
+; CHECK-SD-NEXT: str wzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2float:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: str wzr, [x0]
+; CHECK-GI-NEXT: dup v0.2s, v0.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load float, ptr %p
+ %b = insertelement <2 x float> poison, float %a, i64 0
+ store float 0.0, ptr %p
+ %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
+ ret <2 x float> %c
+}
+
define <3 x float> @dup_v3float(float %a) {
; CHECK-LABEL: dup_v3float:
; CHECK: // %bb.0: // %entry
@@ -1367,6 +2081,27 @@ entry:
ret <3 x float> %c
}
+define <3 x float> @loaddup_str_v3float(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3float:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT: str wzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3float:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: str wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4s, v0.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load float, ptr %p
+ %b = insertelement <3 x float> poison, float %a, i64 0
+ store float 0.0, ptr %p
+ %c = shufflevector <3 x float> %b, <3 x float> poison, <3 x i32> zeroinitializer
+ ret <3 x float> %c
+}
+
define <4 x float> @dup_v4float(float %a) {
; CHECK-LABEL: dup_v4float:
; CHECK: // %bb.0: // %entry
@@ -1401,6 +2136,27 @@ entry:
ret <4 x float> %c
}
+define <4 x float> @loaddup_str_v4float(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4float:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT: str wzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4float:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: str wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4s, v0.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load float, ptr %p
+ %b = insertelement <4 x float> poison, float %a, i64 0
+ store float 0.0, ptr %p
+ %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
+ ret <4 x float> %c
+}
+
define <8 x float> @dup_v8float(float %a) {
; CHECK-SD-LABEL: dup_v8float:
; CHECK-SD: // %bb.0: // %entry
@@ -1452,6 +2208,29 @@ entry:
ret <8 x float> %c
}
+define <8 x float> @loaddup_str_v8float(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v8float:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.4s }, [x0]
+; CHECK-SD-NEXT: str wzr, [x0]
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v8float:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr s1, [x0]
+; CHECK-GI-NEXT: str wzr, [x0]
+; CHECK-GI-NEXT: dup v0.4s, v1.s[0]
+; CHECK-GI-NEXT: dup v1.4s, v1.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load float, ptr %p
+ %b = insertelement <8 x float> poison, float %a, i64 0
+ store float 0.0, ptr %p
+ %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
+ ret <8 x float> %c
+}
+
define <2 x double> @dup_v2double(double %a) {
; CHECK-LABEL: dup_v2double:
; CHECK: // %bb.0: // %entry
@@ -1486,6 +2265,27 @@ entry:
ret <2 x double> %c
}
+define <2 x double> @loaddup_str_v2double(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v2double:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT: str xzr, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2double:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: str xzr, [x0]
+; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load double, ptr %p
+ %b = insertelement <2 x double> poison, double %a, i64 0
+ store double 0.0, ptr %p
+ %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
+ ret <2 x double> %c
+}
+
define <3 x double> @dup_v3double(double %a) {
; CHECK-SD-LABEL: dup_v3double:
; CHECK-SD: // %bb.0: // %entry
@@ -1553,6 +2353,33 @@ entry:
ret <3 x double> %c
}
+define <3 x double> @loaddup_str_v3double(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v3double:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: str xzr, [x0]
+; CHECK-SD-NEXT: fmov d1, d0
+; CHECK-SD-NEXT: fmov d2, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3double:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr d2, [x0]
+; CHECK-GI-NEXT: str xzr, [x0]
+; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
+; CHECK-GI-NEXT: dup v2.2d, v2.d[0]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load double, ptr %p
+ %b = insertelement <3 x double> poison, double %a, i64 0
+ store double 0.0, ptr %p
+ %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
+ ret <3 x double> %c
+}
+
define <4 x double> @dup_v4double(double %a) {
; CHECK-SD-LABEL: dup_v4double:
; CHECK-SD: // %bb.0: // %entry
@@ -1604,6 +2431,29 @@ entry:
ret <4 x double> %c
}
+define <4 x double> @loaddup_str_v4double(ptr %p) {
+; CHECK-SD-LABEL: loaddup_str_v4double:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT: str xzr, [x0]
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4double:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr d1, [x0]
+; CHECK-GI-NEXT: str xzr, [x0]
+; CHECK-GI-NEXT: dup v0.2d, v1.d[0]
+; CHECK-GI-NEXT: dup v1.2d, v1.d[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %a = load double, ptr %p
+ %b = insertelement <4 x double> poison, double %a, i64 0
+ store double 0.0, ptr %p
+ %c = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> zeroinitializer
+ ret <4 x double> %c
+}
+
define <2 x fp128> @dup_v2fp128(fp128 %a) {
; CHECK-LABEL: dup_v2fp128:
; CHECK: // %bb.0: // %entry
@@ -1638,6 +2488,23 @@ entry:
ret <2 x fp128> %c
}
+define <2 x fp128> @loaddup_str_v2fp128(ptr %p) {
+; CHECK-LABEL: loaddup_str_v2fp128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: adrp x8, .LCPI155_0
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI155_0]
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: str q2, [x0]
+; CHECK-NEXT: ret
+entry:
+ %a = load fp128, ptr %p
+ %b = insertelement <2 x fp128> poison, fp128 %a, i64 0
+ store fp128 0xL00000000000000000000000000000000, ptr %p
+ %c = shufflevector <2 x fp128> %b, <2 x fp128> poison, <2 x i32> zeroinitializer
+ ret <2 x fp128> %c
+}
+
define <3 x fp128> @dup_v3fp128(fp128 %a) {
; CHECK-LABEL: dup_v3fp128:
; CHECK: // %bb.0: // %entry
@@ -1675,6 +2542,24 @@ entry:
ret <3 x fp128> %c
}
+define <3 x fp128> @loaddup_str_v3fp128(ptr %p) {
+; CHECK-LABEL: loaddup_str_v3fp128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: adrp x8, .LCPI159_0
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI159_0]
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov v2.16b, v0.16b
+; CHECK-NEXT: str q3, [x0]
+; CHECK-NEXT: ret
+entry:
+ %a = load fp128, ptr %p
+ %b = insertelement <3 x fp128> poison, fp128 %a, i64 0
+ store fp128 0xL00000000000000000000000000000000, ptr %p
+ %c = shufflevector <3 x fp128> %b, <3 x fp128> poison, <3 x i32> zeroinitializer
+ ret <3 x fp128> %c
+}
+
define <4 x fp128> @dup_v4fp128(fp128 %a) {
; CHECK-LABEL: dup_v4fp128:
; CHECK: // %bb.0: // %entry
@@ -1714,3 +2599,22 @@ entry:
%c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
ret <4 x fp128> %c
}
+
+define <4 x fp128> @loaddup_str_v4fp128(ptr %p) {
+; CHECK-LABEL: loaddup_str_v4fp128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: adrp x8, .LCPI163_0
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI163_0]
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov v2.16b, v0.16b
+; CHECK-NEXT: mov v3.16b, v0.16b
+; CHECK-NEXT: str q4, [x0]
+; CHECK-NEXT: ret
+entry:
+ %a = load fp128, ptr %p
+ %b = insertelement <4 x fp128> poison, fp128 %a, i64 0
+ store fp128 0xL00000000000000000000000000000000, ptr %p
+ %c = shufflevector <4 x fp128> %b, <4 x fp128> poison, <4 x i32> zeroinitializer
+ ret <4 x fp128> %c
+}
More information about the llvm-commits
mailing list