[llvm] 54c7aec - [AArch64][RCPC3] Instruction selection for LDAP1/STL1 instructions

Lucas Prates via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 7 04:33:01 PDT 2023


Author: Lucas Prates
Date: 2023-07-07T12:32:56+01:00
New Revision: 54c7aec449c3840b4fdab001137695d5f5ba4efb

URL: https://github.com/llvm/llvm-project/commit/54c7aec449c3840b4fdab001137695d5f5ba4efb
DIFF: https://github.com/llvm/llvm-project/commit/54c7aec449c3840b4fdab001137695d5f5ba4efb.diff

LOG: [AArch64][RCPC3] Instruction selection for LDAP1/STL1 instructions

This implements the DAG patterns to enable instruction selection for the
LDAP1 and STL1 instructions from FEAT_LRCPC3. The instructions should
match the following combinations:

* Aqcuiring atomic load + vector insert element for LDAP1.
* Vector extract element + releasing atomic store for STL1.

Patterns have also been added to cope with the DAG structure found when
dealing with 1-lane sub-vectors.

Reviewed By: tmatheson, efriedma

Differential Revision: https://reviews.llvm.org/D153129

Added: 
    llvm/test/CodeGen/AArch64/rcpc3-sve.ll
    llvm/test/CodeGen/AArch64/rcpc3.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrAtomics.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 64629eec228935..1427886d71c073 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -542,3 +542,34 @@ let Predicates = [HasLSE] in {
   defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
   defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
 }
+
+// v8.9a/v9.4a FEAT_LRCPC patterns
+let Predicates = [HasRCPC3, HasNEON] in {
+  // LDAP1 loads
+  def : Pat<(vector_insert (v2i64 VecListOne128:$Rd),
+                (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)), VectorIndexD:$idx),
+            (LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
+  def : Pat<(vector_insert (v2f64 VecListOne128:$Rd),
+                (f64 (bitconvert (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))), VectorIndexD:$idx),
+            (LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
+  def : Pat<(v1i64 (scalar_to_vector
+                (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))),
+            (EXTRACT_SUBREG (LDAP1 (v2i64 (IMPLICIT_DEF)), (i64 0), GPR64sp:$Rn), dsub)>;
+  def : Pat<(v1f64 (scalar_to_vector
+                (f64 (bitconvert (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))))),
+            (EXTRACT_SUBREG (LDAP1 (v2f64 (IMPLICIT_DEF)), (i64 0), GPR64sp:$Rn), dsub)>;
+
+  // STL1 stores
+  def : Pat<(releasing_store<atomic_store_64> GPR64sp:$Rn,
+                (i64 (vector_extract (v2i64 VecListOne128:$Vt), VectorIndexD:$idx))),
+            (STL1 VecListOne128:$Vt, VectorIndexD:$idx, GPR64sp:$Rn)>;
+  def : Pat<(releasing_store<atomic_store_64> GPR64sp:$Rn,
+                (i64 (bitconvert (f64 (vector_extract (v2f64 VecListOne128:$Vt), VectorIndexD:$idx))))),
+            (STL1 VecListOne128:$Vt, VectorIndexD:$idx, GPR64sp:$Rn)>;
+  // The v1i64 version of the vldap1_lane_* intrinsic is represented as a
+  // vector_insert -> vector_extract -> atomic store sequence, which is captured
+  // by the patterns above. We only need to cover the v1f64 case manually.
+  def : Pat<(releasing_store<atomic_store_64> GPR64sp:$Rn,
+                (i64 (bitconvert (v1f64 VecListOne64:$Vt)))),
+            (STL1 (SUBREG_TO_REG (i64 0), VecListOne64:$Vt, dsub), (i64 0), GPR64sp:$Rn)>;
+}

diff  --git a/llvm/test/CodeGen/AArch64/rcpc3-sve.ll b/llvm/test/CodeGen/AArch64/rcpc3-sve.ll
new file mode 100644
index 00000000000000..d72a9a9f76b867
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/rcpc3-sve.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+v8.9a -mattr=+sve -mattr=+rcpc3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+v8.9a -mattr=+sve < %s | FileCheck %s
+
+; Show what happens with RCPC3 for extract/insert into SVE vectors.
+; Currently there is no RCPC3 codegen expected for this.
+
+define hidden <vscale x 2 x i64> @test_load_sve_lane0(ptr nocapture noundef readonly %a, <vscale x 2 x i64> noundef %b) local_unnamed_addr {
+; CHECK-LABEL: test_load_sve_lane0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldapr x8, [x0]
+; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    mov z0.d, p0/m, x8
+; CHECK-NEXT:    ret
+  %1 = load atomic i64, ptr %a acquire, align 8
+  %vldap1_lane = insertelement <vscale x 2 x i64> %b, i64 %1, i64 0
+  ret <vscale x 2 x i64> %vldap1_lane
+}
+
+define hidden <vscale x 2 x i64> @test_load_sve_lane1(ptr nocapture noundef readonly %a, <vscale x 2 x i64> noundef %b) local_unnamed_addr {
+; CHECK-LABEL: test_load_sve_lane1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    ldapr x9, [x0]
+; CHECK-NEXT:    index z2.d, #0, #1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z1.d
+; CHECK-NEXT:    mov z0.d, p0/m, x9
+; CHECK-NEXT:    ret
+  %1 = load atomic i64, ptr %a acquire, align 8
+  %vldap1_lane = insertelement <vscale x 2 x i64> %b, i64 %1, i64 1
+  ret <vscale x 2 x i64> %vldap1_lane
+}
+
+define hidden void @test_store_sve_lane0(ptr nocapture noundef writeonly %a, <vscale x 2 x i64> noundef %b) local_unnamed_addr {
+; CHECK-LABEL: test_store_sve_lane0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    stlr x8, [x0]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 2 x i64> %b, i64 0
+  store atomic i64 %1, ptr %a release, align 8
+  ret void
+}
+
+define hidden void @test_store_sve_lane1(ptr nocapture noundef writeonly %a, <vscale x 2 x i64> noundef %b) local_unnamed_addr {
+; CHECK-LABEL: test_store_sve_lane1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    stlr x8, [x0]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 2 x i64> %b, i64 1
+  store atomic i64 %1, ptr %a release, align 8
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AArch64/rcpc3.ll b/llvm/test/CodeGen/AArch64/rcpc3.ll
new file mode 100644
index 00000000000000..d41613953b3c26
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/rcpc3.ll
@@ -0,0 +1,325 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+v8.9a -mattr=+rcpc3 < %s | FileCheck --check-prefixes=BOTH,RCPC3 %s
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+v8.9a < %s | FileCheck --check-prefixes=BOTH,NO-RCPC3 %s
+
+define hidden <2 x i64> @test_ldap1_2xi64_lane0(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_ldap1_2xi64_lane0:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_ldap1_2xi64_lane0:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    ldapr x8, [x0]
+; NO-RCPC3-NEXT:    mov v0.d[0], x8
+; NO-RCPC3-NEXT:    ret
+  %1 = load atomic i64, ptr %a acquire, align 8
+  %ldap1 = insertelement <2 x i64> %b, i64 %1, i64 0
+  ret <2 x i64> %ldap1
+}
+
+define hidden <2 x i64> @test_ldap1_2xi64_lane1(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_ldap1_2xi64_lane1:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    ldap1 { v0.d }[1], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_ldap1_2xi64_lane1:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    ldapr x8, [x0]
+; NO-RCPC3-NEXT:    mov v0.d[1], x8
+; NO-RCPC3-NEXT:    ret
+  %1 = load atomic i64, ptr %a acquire, align 8
+  %ldap1 = insertelement <2 x i64> %b, i64 %1, i64 1
+  ret <2 x i64> %ldap1
+}
+
+define hidden nofpclass(nan inf) <2 x double> @test_ldap1_2xdouble_lane0(ptr nocapture noundef readonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_ldap1_2xdouble_lane0:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_ldap1_2xdouble_lane0:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    ldapr x8, [x0]
+; NO-RCPC3-NEXT:    fmov d1, x8
+; NO-RCPC3-NEXT:    mov v0.d[0], v1.d[0]
+; NO-RCPC3-NEXT:    ret
+  %1 = load atomic double, ptr %a acquire, align 8
+  %ldap1 = insertelement <2 x double> %b, double %1, i64 0
+  ret <2 x double> %ldap1
+}
+
+define hidden nofpclass(nan inf) <2 x double> @test_ldap1_2xdouble_lane1(ptr nocapture noundef readonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_ldap1_2xdouble_lane1:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    ldap1 { v0.d }[1], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_ldap1_2xdouble_lane1:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    ldapr x8, [x0]
+; NO-RCPC3-NEXT:    fmov d1, x8
+; NO-RCPC3-NEXT:    mov v0.d[1], v1.d[0]
+; NO-RCPC3-NEXT:    ret
+  %1 = load atomic double, ptr %a acquire, align 8
+  %ldap1 = insertelement <2 x double> %b, double %1, i64 1
+  ret <2 x double> %ldap1
+}
+
+define hidden <1 x i64> @test_ldap1_1xi64_lane0(ptr nocapture noundef readonly %a, <1 x i64> noundef %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_ldap1_1xi64_lane0:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_ldap1_1xi64_lane0:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    ldapr x8, [x0]
+; NO-RCPC3-NEXT:    fmov d0, x8
+; NO-RCPC3-NEXT:    ret
+  %1 = load atomic i64, ptr %a acquire, align 8
+  %ldap1 = insertelement <1 x i64> poison, i64 %1, i64 0
+  ret <1 x i64> %ldap1
+}
+
+define hidden nofpclass(nan inf) <1 x double> @test_ldap1_1xdouble_lane0(ptr nocapture noundef readonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_ldap1_1xdouble_lane0:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_ldap1_1xdouble_lane0:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    ldapr x8, [x0]
+; NO-RCPC3-NEXT:    fmov d0, x8
+; NO-RCPC3-NEXT:    ret
+  %1 = load atomic double, ptr %a acquire, align 8
+  %ldap1 = insertelement <1 x double> poison, double %1, i64 0
+  ret <1 x double> %ldap1
+}
+
+define hidden void @test_stl1_2xi64_lane0(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_stl1_2xi64_lane0:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_stl1_2xi64_lane0:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    fmov x8, d0
+; NO-RCPC3-NEXT:    stlr x8, [x0]
+; NO-RCPC3-NEXT:    ret
+  %1 = extractelement <2 x i64> %b, i64 0
+  store atomic i64 %1, ptr %a release, align 8
+  ret void
+}
+
+define hidden void @test_stl1_2xi64_lane1(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_stl1_2xi64_lane1:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    stl1 { v0.d }[1], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_stl1_2xi64_lane1:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    mov x8, v0.d[1]
+; NO-RCPC3-NEXT:    stlr x8, [x0]
+; NO-RCPC3-NEXT:    ret
+  %1 = extractelement <2 x i64> %b, i64 1
+  store atomic i64 %1, ptr %a release, align 8
+  ret void
+}
+
+define hidden void @test_stl1_2xdouble_lane0(ptr nocapture noundef writeonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_stl1_2xdouble_lane0:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_stl1_2xdouble_lane0:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    fmov x8, d0
+; NO-RCPC3-NEXT:    stlr x8, [x0]
+; NO-RCPC3-NEXT:    ret
+  %1 = extractelement <2 x double> %b, i64 0
+  store atomic double %1, ptr %a release, align 8
+  ret void
+}
+
+define hidden void @test_stl1_2xdouble_lane1(ptr nocapture noundef writeonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_stl1_2xdouble_lane1:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    stl1 { v0.d }[1], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_stl1_2xdouble_lane1:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    mov d0, v0.d[1]
+; NO-RCPC3-NEXT:    fmov x8, d0
+; NO-RCPC3-NEXT:    stlr x8, [x0]
+; NO-RCPC3-NEXT:    ret
+  %1 = extractelement <2 x double> %b, i64 1
+  store atomic double %1, ptr %a release, align 8
+  ret void
+}
+
+define hidden void @test_stl1_1xi64_lane0(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_stl1_1xi64_lane0:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
+; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_stl1_1xi64_lane0:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-RCPC3-NEXT:    fmov x8, d0
+; NO-RCPC3-NEXT:    stlr x8, [x0]
+; NO-RCPC3-NEXT:    ret
+  %1 = extractelement <1 x i64> %b, i64 0
+  store atomic i64 %1, ptr %a release, align 8
+  ret void
+}
+
+define hidden void @test_stl1_1xdouble_lane0(ptr nocapture noundef writeonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
+;
+; RCPC3-LABEL: test_stl1_1xdouble_lane0:
+; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
+; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    ret
+;
+; NO-RCPC3-LABEL: test_stl1_1xdouble_lane0:
+; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    fmov x8, d0
+; NO-RCPC3-NEXT:    stlr x8, [x0]
+; NO-RCPC3-NEXT:    ret
+  %1 = extractelement <1 x double> %b, i64 0
+  store atomic double %1, ptr %a release, align 8
+  ret void
+}
+
+; The remaining tests do not have any particular RCPC3-specific codegen:
+
+; load-acquire a plain non-vector double value
+define hidden double @test_double_load(ptr nocapture noundef readonly %a) local_unnamed_addr {
+; BOTH-LABEL: test_double_load:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    ldapr x8, [x0]
+; BOTH-NEXT:    fmov d0, x8
+; BOTH-NEXT:    ret
+  %1 = load atomic double, ptr %a acquire, align 8
+  ret double %1
+}
+
+; store-release a plain non-vector double value
+define hidden void @test_double_store(ptr nocapture noundef writeonly %a, double noundef %b) local_unnamed_addr {
+; BOTH-LABEL: test_double_store:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    fmov x8, d0
+; BOTH-NEXT:    stlr x8, [x0]
+; BOTH-NEXT:    ret
+  store atomic double %b, ptr %a release, align 8
+  ret void
+}
+
+; load-acquire an i64, followed by a bitcast to a 64-bit vector
+define hidden <2 x i32> @test_load_i64_bitcast_2xi32(ptr nocapture noundef readonly %a) local_unnamed_addr {
+; BOTH-LABEL: test_load_i64_bitcast_2xi32:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    ldapr x8, [x0]
+; BOTH-NEXT:    fmov d0, x8
+; BOTH-NEXT:    ret
+  %1 = load atomic i64, ptr %a acquire, align 8
+  %2 = bitcast i64 %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+; bitcast from a 64-bit vector, followed by a store-release of the i64
+define hidden void @test_bitcast_2xi32_store_i64(ptr nocapture noundef readonly %a, <2 x i32> noundef %b) local_unnamed_addr {
+; BOTH-LABEL: test_bitcast_2xi32_store_i64:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    fmov x8, d0
+; BOTH-NEXT:    stlr x8, [x0]
+; BOTH-NEXT:    ret
+  %1 = bitcast <2 x i32> %b to i64
+  store atomic i64 %1, ptr %a release, align 8
+  ret void
+}
+
+; (non-atomic) load a 64-bit vector
+define hidden <2 x i32> @test_load_2xi32(ptr nocapture noundef readonly %a) local_unnamed_addr {
+; BOTH-LABEL: test_load_2xi32:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    ldr d0, [x0]
+; BOTH-NEXT:    ret
+  %1 = load <2 x i32>, ptr %a, align 8
+  ret <2 x i32> %1
+}
+
+; (non-atomic) store a 64-bit vector
+define hidden void @test_store_2xi32(ptr nocapture noundef writeonly %a, <2 x i32> noundef %b) local_unnamed_addr {
+; BOTH-LABEL: test_store_2xi32:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    str d0, [x0]
+; BOTH-NEXT:    ret
+  store <2 x i32> %b, ptr %a, align 8
+  ret void
+}
+
+; (non-atomic) load a 64-bit vector
+define hidden <1 x i64> @test_load_1xi64(ptr nocapture noundef readonly %a) local_unnamed_addr {
+; BOTH-LABEL: test_load_1xi64:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    ldr d0, [x0]
+; BOTH-NEXT:    ret
+  %1 = load <1 x i64>, ptr %a, align 8
+  ret <1 x i64> %1
+}
+
+; (non-atomic) store a 64-bit vector
+define hidden void @test_store_1xi64(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr {
+; BOTH-LABEL: test_store_1xi64:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    str d0, [x0]
+; BOTH-NEXT:    ret
+  store <1 x i64> %b, ptr %a, align 8
+  ret void
+}
+
+; (non-atomic) load a 64-bit value and insert into vector
+define hidden <2 x i64> @test_load_insert_2xi64(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr {
+; BOTH-LABEL: test_load_insert_2xi64:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    ld1 { v0.d }[0], [x0]
+; BOTH-NEXT:    ret
+  %1 = load i64, ptr %a, align 8
+  %2 = insertelement <2 x i64> %b, i64 %1, i64 0
+  ret <2 x i64> %2
+}
+
+; extract from vector and (non-atomic) store a 64-bit value
+define hidden void @test_extract_store_2xi64(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr {
+; BOTH-LABEL: test_extract_store_2xi64:
+; BOTH:       // %bb.0:
+; BOTH-NEXT:    st1 { v0.d }[1], [x0]
+; BOTH-NEXT:    ret
+  %1 = extractelement <2 x i64> %b, i64 1
+  store i64 %1, ptr %a, align 8
+  ret void
+}


        


More information about the llvm-commits mailing list