[llvm] 1610311 - [AArch64] Fixes for BigEndian 128bit volatile, atomic and non-temporal loads/stores

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 29 09:21:25 PDT 2023


Author: David Green
Date: 2023-09-29T17:21:19+01:00
New Revision: 1610311a95b1a98f47e9242d67141c5b3e44a138

URL: https://github.com/llvm/llvm-project/commit/1610311a95b1a98f47e9242d67141c5b3e44a138
DIFF: https://github.com/llvm/llvm-project/commit/1610311a95b1a98f47e9242d67141c5b3e44a138.diff

LOG: [AArch64] Fixes for BigEndian 128bit volatile, atomic and non-temporal loads/stores

This fixes up the generation of 128bit atomic, volatile and non-temporal
loads/stores, under the assumption that they should usually be the same as
standard versions.
https://godbolt.org/z/xxc89eMKE

Fixes #64580
Closes #67413

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
    llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
    llvm/test/CodeGen/AArch64/nontemporal.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 06fe64d10a1c13e..8e5ded8dd47aa15 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5705,11 +5705,11 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
     // legalization will break up 256 bit inputs.
     ElementCount EC = MemVT.getVectorElementCount();
     if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
-        EC.isKnownEven() &&
-        ((MemVT.getScalarSizeInBits() == 8u ||
-          MemVT.getScalarSizeInBits() == 16u ||
-          MemVT.getScalarSizeInBits() == 32u ||
-          MemVT.getScalarSizeInBits() == 64u))) {
+        EC.isKnownEven() && DAG.getDataLayout().isLittleEndian() &&
+        (MemVT.getScalarSizeInBits() == 8u ||
+         MemVT.getScalarSizeInBits() == 16u ||
+         MemVT.getScalarSizeInBits() == 32u ||
+         MemVT.getScalarSizeInBits() == 64u)) {
       SDValue Lo =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
                       MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
@@ -5769,6 +5769,8 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
   SDLoc DL(Op);
   auto StoreValue = DAG.SplitScalar(Value, DL, MVT::i64, MVT::i64);
   unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
+  if (DAG.getDataLayout().isBigEndian())
+    std::swap(StoreValue.first, StoreValue.second);
   SDValue Result = DAG.getMemIntrinsicNode(
       Opcode, DL, DAG.getVTList(MVT::Other),
       {StoreNode->getChain(), StoreValue.first, StoreValue.second,
@@ -24162,8 +24164,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
           {LoadNode->getChain(), LoadNode->getBasePtr()},
           LoadNode->getMemoryVT(), LoadNode->getMemOperand());
 
-      SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
-                                 Result.getValue(0), Result.getValue(1));
+      unsigned FirstRes = DAG.getDataLayout().isBigEndian() ? 1 : 0;
+
+      SDValue Pair =
+          DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
+                      Result.getValue(FirstRes), Result.getValue(1 - FirstRes));
       Results.append({Pair, Result.getValue(2) /* Chain */});
     }
     return;

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll
index 2fd70537a394647..08f7dd32c128079 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll
@@ -229,35 +229,35 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ishld
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
@@ -265,7 +265,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ishld
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
@@ -273,7 +273,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr)
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r
@@ -281,7 +281,7 @@ define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll
index 32c7507d1ce706b..74e612d59858fd6 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll
@@ -229,35 +229,35 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ishld
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
@@ -265,7 +265,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ishld
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
@@ -273,7 +273,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr)
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r
@@ -281,7 +281,7 @@ define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
index a09b4c69755d599..de83b702d988ac6 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
@@ -229,49 +229,49 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr unordered, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr monotonic, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire:
-; CHECK:    ldiapp x1, x0, [x0]
+; CHECK:    ldiapp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
-; CHECK:    ldiapp x1, x0, [x0]
+; CHECK:    ldiapp x0, x1, [x0]
     %r = load atomic i128, ptr %ptr acquire, align 16
     ret i128 %r
 }
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r
@@ -279,7 +279,7 @@ define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
 
 define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
 ; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; CHECK:    ldp x1, x0, [x0]
+; CHECK:    ldp x0, x1, [x0]
 ; CHECK:    dmb ish
     %r = load atomic i128, ptr %ptr seq_cst, align 16
     ret i128 %r

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll
index 5652cc52f024a99..c9c9de4f884b7cf 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll
@@ -117,14 +117,14 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_unordered:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr unordered, align 16
     ret void
 }
 
 define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr monotonic, align 16
     ret void
 }
@@ -132,7 +132,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_release:
 ; CHECK:    dmb ish
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr release, align 16
     ret void
 }
@@ -140,7 +140,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
 ; CHECK:    dmb ish
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
 ; CHECK:    dmb ish
     store atomic i128 %value, ptr %ptr seq_cst, align 16
     ret void

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll
index 59b5a1aa038ab58..29d6b15c09022ca 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll
@@ -117,14 +117,14 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_unordered:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr unordered, align 16
     ret void
 }
 
 define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr monotonic, align 16
     ret void
 }

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
index f8f8fe7cd6fa557..84a1f38d423c254 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
@@ -117,21 +117,21 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
 
 define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_unordered:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr unordered, align 16
     ret void
 }
 
 define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr monotonic, align 16
     ret void
 }
 
 define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_release:
-; CHECK:    stilp x1, x0, [x2]
+; CHECK:    stilp x0, x1, [x2]
     store atomic i128 %value, ptr %ptr release, align 16
     ret void
 }
@@ -139,7 +139,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
 ; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
 ; CHECK:    dmb ish
-; CHECK:    stp x1, x0, [x2]
+; CHECK:    stp x0, x1, [x2]
 ; CHECK:    dmb ish
     store atomic i128 %value, ptr %ptr seq_cst, align 16
     ret void

diff  --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
index c3a03b2cb35426b..302eaeb98540b4f 100644
--- a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
+++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
@@ -129,15 +129,10 @@ entry:
 }
 
 define i128 @load_vol(i32, i32, ptr %p) {
-; CHECK-LE-LABEL: load_vol:
-; CHECK-LE:       // %bb.0: // %entry
-; CHECK-LE-NEXT:    ldp x0, x1, [x2]
-; CHECK-LE-NEXT:    ret
-;
-; CHECK-BE-LABEL: load_vol:
-; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    ldp x1, x0, [x2]
-; CHECK-BE-NEXT:    ret
+; CHECK-LABEL: load_vol:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldp x0, x1, [x2]
+; CHECK-NEXT:    ret
 entry:
   %l = load volatile i128, ptr %p, align 16
   ret i128 %l
@@ -154,16 +149,14 @@ entry:
 }
 
 define void @loadstore_vol(i128 %a, ptr %p) {
-; CHECK-LE-LABEL: loadstore_vol:
-; CHECK-LE:       // %bb.0: // %entry
-; CHECK-LE-NEXT:    stp x0, x1, [x2]
-; CHECK-LE-NEXT:    ret
-;
-; CHECK-BE-LABEL: loadstore_vol:
-; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    stp x1, x0, [x2]
-; CHECK-BE-NEXT:    ret
+; CHECK-LABEL: loadstore_vol:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x0, x1, [x2]
+; CHECK-NEXT:    ret
 entry:
   store volatile i128 %a, ptr %p, align 16
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-BE: {{.*}}
+; CHECK-LE: {{.*}}

diff  --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll
index fe19ca7e2cc43d1..f8ba150a0405ff2 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal.ll
@@ -10,9 +10,7 @@ define void @test_stnp_v4i64(ptr %p, <4 x i64> %v) #0 {
 ;
 ; CHECK-BE-LABEL: test_stnp_v4i64:
 ; CHECK-BE:       // %bb.0:
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
 ; CHECK-BE-NEXT:    ret
   store <4 x i64> %v, ptr %p, align 1, !nontemporal !0
   ret void
@@ -565,11 +563,7 @@ define void @test_stnp_v32i8(<32 x i8> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v32i8:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v1.16b, v1.16b
-; CHECK-BE-NEXT:    rev64 v0.16b, v0.16b
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <32 x i8> %v, ptr %ptr, align 4, !nontemporal !0
@@ -585,16 +579,8 @@ define void @test_stnp_v32i16(<32 x i16> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v32i16:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v3.8h, v3.8h
-; CHECK-BE-NEXT:    rev64 v2.8h, v2.8h
-; CHECK-BE-NEXT:    rev64 v1.8h, v1.8h
-; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <32 x i16> %v, ptr %ptr, align 4, !nontemporal !0
@@ -610,16 +596,8 @@ define void @test_stnp_v32f16(<32 x half> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v32f16:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v3.8h, v3.8h
-; CHECK-BE-NEXT:    rev64 v2.8h, v2.8h
-; CHECK-BE-NEXT:    rev64 v1.8h, v1.8h
-; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <32 x half> %v, ptr %ptr, align 4, !nontemporal !0
@@ -635,16 +613,8 @@ define void @test_stnp_v16i32(<16 x i32> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v16i32:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v3.4s, v3.4s
-; CHECK-BE-NEXT:    rev64 v2.4s, v2.4s
-; CHECK-BE-NEXT:    rev64 v1.4s, v1.4s
-; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <16 x i32> %v, ptr %ptr, align 4, !nontemporal !0
@@ -660,16 +630,8 @@ define void @test_stnp_v16f32(<16 x float> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v16f32:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v3.4s, v3.4s
-; CHECK-BE-NEXT:    rev64 v2.4s, v2.4s
-; CHECK-BE-NEXT:    rev64 v1.4s, v1.4s
-; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <16 x float> %v, ptr %ptr, align 4, !nontemporal !0
@@ -776,20 +738,10 @@ define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v16i32_invalid_offset:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    rev64 v3.4s, v3.4s
-; CHECK-BE-NEXT:    rev64 v2.4s, v2.4s
-; CHECK-BE-NEXT:    mov w8, #32032 // =0x7d20
-; CHECK-BE-NEXT:    rev64 v1.4s, v1.4s
-; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
-; CHECK-BE-NEXT:    mov w9, #32000 // =0x7d00
-; CHECK-BE-NEXT:    add x8, x0, x8
-; CHECK-BE-NEXT:    add x9, x0, x9
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q2, q3, [x8]
-; CHECK-BE-NEXT:    stnp q0, q1, [x9]
+; CHECK-BE-NEXT:    str q3, [x0, #32048]
+; CHECK-BE-NEXT:    str q2, [x0, #32032]
+; CHECK-BE-NEXT:    str q1, [x0, #32016]
+; CHECK-BE-NEXT:    str q0, [x0, #32000]
 ; CHECK-BE-NEXT:    ret
 entry:
   %gep = getelementptr <16 x i32>, ptr %ptr, i32 500
@@ -808,18 +760,10 @@ define void @test_stnp_v16f64(<16 x double> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v16f64:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    ext v7.16b, v7.16b, v7.16b, #8
-; CHECK-BE-NEXT:    ext v6.16b, v6.16b, v6.16b, #8
-; CHECK-BE-NEXT:    ext v5.16b, v5.16b, v5.16b, #8
-; CHECK-BE-NEXT:    ext v4.16b, v4.16b, v4.16b, #8
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q6, q7, [x0, #96]
-; CHECK-BE-NEXT:    stnp q4, q5, [x0, #64]
-; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
+; CHECK-BE-NEXT:    stp q4, q5, [x0, #64]
+; CHECK-BE-NEXT:    stp q6, q7, [x0, #96]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <16 x double> %v, ptr %ptr, align 4, !nontemporal !0
@@ -837,18 +781,10 @@ define void @test_stnp_v16i64(<16 x i64> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v16i64:
 ; CHECK-BE:       // %bb.0: // %entry
-; CHECK-BE-NEXT:    ext v7.16b, v7.16b, v7.16b, #8
-; CHECK-BE-NEXT:    ext v6.16b, v6.16b, v6.16b, #8
-; CHECK-BE-NEXT:    ext v5.16b, v5.16b, v5.16b, #8
-; CHECK-BE-NEXT:    ext v4.16b, v4.16b, v4.16b, #8
-; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-BE-NEXT:    stnp q6, q7, [x0, #96]
-; CHECK-BE-NEXT:    stnp q4, q5, [x0, #64]
-; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q0, q1, [x0]
+; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
+; CHECK-BE-NEXT:    stp q4, q5, [x0, #64]
+; CHECK-BE-NEXT:    stp q6, q7, [x0, #96]
 ; CHECK-BE-NEXT:    ret
 entry:
   store <16 x i64> %v, ptr %ptr, align 4, !nontemporal !0


        


More information about the llvm-commits mailing list