[llvm] [AArch64] Fixes for BigEndian 128bit volatile, atomic and non-temporal loads/stores (PR #67413)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 26 03:09:57 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

<details>
<summary>Changes</summary>

This fixes up the generation of 128bit atomic, volatile and non-temporal loads/stores, under the assumption that their operands should usually be the same as standard loads/stores.
https://godbolt.org/z/xxc89eMKE

Non-temporal stores were disabled under BE to keep things simple, bringing them in line with the LE versions. Atomic and volatile STP and LDP nodes have their operands swapped to makes sure they end up loading data in the same order as the non atomic/volatile versions.

Fixes #<!-- -->64580

---

Patch is 23.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67413.diff


9 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+12-7) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll (+8-8) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll (+8-8) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll (+8-8) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll (+4-4) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll (+4-4) 
- (modified) llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll (+11-18) 
- (modified) llvm/test/CodeGen/AArch64/nontemporal.ll (+22-86) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3de6bd1ec94a82a..3199a971d13859a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5705,11 +5705,11 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
     // legalization will break up 256 bit inputs.
     ElementCount EC = MemVT.getVectorElementCount();
     if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
-        EC.isKnownEven() &&
-        ((MemVT.getScalarSizeInBits() == 8u ||
-          MemVT.getScalarSizeInBits() == 16u ||
-          MemVT.getScalarSizeInBits() == 32u ||
-          MemVT.getScalarSizeInBits() == 64u))) {
+        EC.isKnownEven() && DAG.getDataLayout().isLittleEndian() &&
+        (MemVT.getScalarSizeInBits() == 8u ||
+         MemVT.getScalarSizeInBits() == 16u ||
+         MemVT.getScalarSizeInBits() == 32u ||
+         MemVT.getScalarSizeInBits() == 64u)) {
       SDValue Lo =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
                       MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
@@ -5769,6 +5769,8 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
   SDLoc DL(Op);
   auto StoreValue = DAG.SplitScalar(Value, DL, MVT::i64, MVT::i64);
   unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
+  if (DAG.getDataLayout().isBigEndian())
+    std::swap(StoreValue.first, StoreValue.second);
   SDValue Result = DAG.getMemIntrinsicNode(
       Opcode, DL, DAG.getVTList(MVT::Other),
       {StoreNode->getChain(), StoreValue.first, StoreValue.second,
@@ -24169,8 +24171,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
           {LoadNode->getChain(), LoadNode->getBasePtr()},
           LoadNode->getMemoryVT(), LoadNode->getMemOperand());
 
-      SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
-                                 Result.getValue(0), Result.getValue(1));
+      unsigned FirstRes = DAG.getDataLayout().isBigEndian() ? 1 : 0;
+
+      SDValue Pair =
+          DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
+                      Result.getValue(FirstRes), Result.getValue(1 - FirstRes));
       Results.append({Pair, Result.getValue(2) /* Chain */});
     }
     return;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll
index 2fd70537a394647..08f7dd32c128079 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll
@@ -229,35 +229,35 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ishld
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
@@ -265,7 +265,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ishld
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
@@ -273,7 +273,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr)
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r
@@ -281,7 +281,7 @@ define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll
index 32c7507d1ce706b..74e612d59858fd6 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll
@@ -229,35 +229,35 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ishld
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
@@ -265,7 +265,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ishld
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
@@ -273,7 +273,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr)
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r
@@ -281,7 +281,7 @@ define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
index a09b4c69755d599..de83b702d988ac6 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
@@ -229,49 +229,49 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire:
-; CHECK:    ldiapp x1, x0, [x0]
+; CHECK:    ldiapp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
-; CHECK:    ldiapp x1, x0, [x0]
+; CHECK:    ldiapp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r
@@ -279,7 +279,7 @@ define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll
index 5652cc52f024a99..c9c9de4f884b7cf 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll
@@ -117,14 +117,14 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_unordered:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr unordered, align 16
     ret void
 }
 
 define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr monotonic, align 16
     ret void
 }
@@ -132,7 +132,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_release:
 ; CHECK:    dmb ish
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr release, align 16
     ret void
 }
@@ -140,7 +140,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
 ; CHECK:    dmb ish
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
 ; CHECK:    dmb ish
     store atomic i128 %value, ptr %ptr seq_cst, align 16
     ret void
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll
index 59b5a1aa038ab58..29d6b15c09022ca 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll
@@ -117,14 +117,14 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_unordered:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr unordered, align 16
     ret void
 }
 
 define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr monotonic, align 16
     ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
index f8f8fe7cd6fa557..84a1f38d423c254 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
@@ -117,21 +117,21 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_unordered:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr unordered, align 16
     ret void
 }
 
 define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr monotonic, align 16
     ret void
 }
 
 define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_release:
-; CHECK:    stilp x1, x0, [x2]
+; CHECK:    stilp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr release, align 16
     ret void
 }
@@ -139,7 +139,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
 ; CHECK:    dmb ish
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
 ; CHECK:    dmb ish
     store atomic i128 %value, ptr %ptr seq_cst, align 16
     ret void
diff --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
index c3a03b2cb35426b..302eaeb98540b4f 100644
--- a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
+++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
@@ -129,15 +129,10 @@ entry:
 }
 
 define i128 @load_vol(i32, i32, ptr %p) {
-; CHECK-LE-LABEL: load_vol:
-; CHECK-LE:       // %bb.0: // %entry
-; CHECK-LE-NEXT:    ldp x0, x1, [x2]
-; CHECK-LE-NEXT:    ret
-;
-; CHECK-BE-LABEL: load_vol:
-; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    ldp x1, x0, [x2]
-; CHECK-BE-NEXT:    ret
+; CHECK-LABEL: load_vol:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldp x0, x1, [x2]
+; CHECK-NEXT:    ret
 entry:
   %l = load volatile i128, ptr %p, align 16
   ret i128 %l
@@ -154,16 +149,14 @@ entry:
 }
 
 define void @loadstore_vol(i128 %a, ptr %p) {
-; CHECK-LE-LABEL: loadstore_vol:
-; CHECK-LE:       // %bb.0: // %entry
-; CHECK-LE-NEXT:    stp x0, x1, [x2]
-; CHECK-LE-NEXT:    ret
-;
-; CHECK-BE-LABEL: loadstore_vol:
-; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    stp x1, x0, [x2]
-; CHECK-BE-NEXT:    ret
+; CHECK-LABEL: loadstore_vol:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x0, x1, [x2]
+; CHECK-NEXT:    ret
 entry:
   store volatile i128 %a, ptr %p, align 16
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-BE: {{.*}}
+; CHECK-LE: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll
index fe19ca7e2cc43d1..f8ba150a0405ff2 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal.ll
@@ -10,9 +10,7 @@ define void @test_stnp_v4i64(ptr %p, <4 x i64> %v) #0 {
 ;
 ; CHECK-BE-LABEL: test_stnp_v4i64:
 ; CHECK-BE:       // %bb.0:
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
 ; CHECK-BE-NEXT:    ret
   store <4 x i64> %v, ptr %p, align 1, !nontemporal !0
   ret void
@@ -565,11 +563,7 @@ define void @test_stnp_v32i8(<32 x i8> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v32i8:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v1.16b, v1.16b
-; CHECK-BE-NEXT:    rev64 v0.16b, v0.16b
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <32 x i8> %v, ptr %ptr, align 4, !nontemporal !0
@@ -585,16 +579,8 @@ define void @test_stnp_v32i16(<32 x i16> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v32i16:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v3.8h, v3.8h
-; CHECK-BE-NEXT:    rev64 v2.8h, v2.8h
-; CHECK-BE-NEXT:    rev64 v1.8h, v1.8h
-; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <32 x i16> %v, ptr %ptr, align 4, !nontemporal !0
@@ -610,16 +596,8 @@ define void @test_stnp_v32f16(<32 x half> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v32f16:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v3.8h, v3.8h
-; CHECK-BE-NEXT:    rev64 v2.8h, v2.8h
-; CHECK-BE-NEXT:    rev64 v1.8h, v1.8h
-; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <32 x half> %v, ptr %ptr, align 4, !nontemporal !0
@@ -635,16 +613,8 @@ define void @test_stnp_v16i32(<16 x i32> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v16i32:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v3.4s, v3.4s
-; CHECK-BE-NEXT:    rev64 v2.4s, v2.4s
-; CHECK-BE-NEXT:    rev64 v1.4s, v1.4s
-; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <16 x i32> %v, ptr %ptr, align 4, !nontemporal !0
@@ -660,16...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/67413


More information about the llvm-commits mailing list