[llvm] [AArch64][GlobalISel] Add support for pre-indexed loads/stores. (PR #70185)

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 25 23:29:48 PDT 2023


https://github.com/aemerson updated https://github.com/llvm/llvm-project/pull/70185

>From 1ae07db4328bfd5ca8e46c577a5de3c5e9b6ff3e Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Sun, 15 Oct 2023 15:29:14 -0700
Subject: [PATCH 1/3] [AArch64][GlobalISel] Add support for pre-indexed
 loads/stores.

The pre-index matcher just needs some small heuristics to make sure it doesn't
cause regressions. Apart from that it's a simple change, since the only
difference is an immediate operand of '1' vs '0' in the instruction.
---
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  24 +-
 llvm/lib/Target/AArch64/AArch64Combine.td     |   2 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |   4 -
 .../GISel/AArch64InstructionSelector.cpp      |  83 ++--
 .../legalize-indexed-load-stores.mir          |  22 +
 .../AArch64/GlobalISel/store-merging.ll       |   4 +-
 .../CodeGen/AArch64/arm64-indexed-memory.ll   | 303 +++----------
 .../AArch64/arm64-indexed-vector-ldst.ll      | 428 ++++++------------
 8 files changed, 289 insertions(+), 581 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 1cccddfd972221c..a404bd0da19e172 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1141,11 +1141,29 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
       return false;
   }
 
+  // Avoid increasing cross-block register pressure.
+  for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
+    if (AddrUse.getParent() != LdSt.getParent())
+      return false;
+  }
+
   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
   // That might allow us to end base's liveness here by adjusting the constant.
-
-  return all_of(MRI.use_nodbg_instructions(Addr),
-                [&](MachineInstr &UseMI) { return dominates(LdSt, UseMI); });
+  bool RealUse = false;
+  for (auto &PtrUse : MRI.use_nodbg_instructions(Addr)) {
+    if (!dominates(LdSt, PtrUse))
+      return false; // All use must be dominated by the load/store.
+
+    // If Ptr may be folded in addressing mode of other use, then it's
+    // not profitable to do this transformation.
+    if (auto *UseLdSt = dyn_cast<GLoadStore>(&PtrUse)) {
+      if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
+        RealUse = true;
+    } else {
+      RealUse = true;
+    }
+  }
+  return RealUse;
 }
 
 bool CombinerHelper::matchCombineIndexedLoadStore(
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 017c4523c23a184..9ae1dd99f20f45d 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -245,7 +245,7 @@ def AArch64PostLegalizerLowering
 // Post-legalization combines which are primarily optimizations.
 def AArch64PostLegalizerCombiner
     : GICombiner<"AArch64PostLegalizerCombinerImpl",
-                       [copy_prop, combines_for_extload,
+                       [copy_prop, combines_for_extload, reassocs,
                         combine_indexed_load_store,
                         sext_trunc_sextload, mutate_anyext_to_zext,
                         hoist_logic_op_with_same_opcode_hands,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7211607fee528a6..25919816747df4c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -23699,10 +23699,6 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
 bool AArch64TargetLowering::isIndexingLegal(MachineInstr &MI, Register Base,
                                             Register Offset, bool IsPre,
                                             MachineRegisterInfo &MRI) const {
-  // HACK
-  if (IsPre)
-    return false; // Until we implement.
-
   auto CstOffset = getIConstantVRegVal(Offset, MRI);
   if (!CstOffset || CstOffset->isZero())
     return false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 941607dae29bb90..942edbd78b6452b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5659,24 +5659,34 @@ bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
   Register WriteBack = Ld.getWritebackReg();
   Register Base = Ld.getBaseReg();
   Register Offset = Ld.getOffsetReg();
-
-  if (Ld.isPre())
-    return false; // TODO: add pre-inc support
-
-  unsigned Opc = 0;
-  static constexpr unsigned GPROpcodes[] = {
-      AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
-      AArch64::LDRXpost};
-  static constexpr unsigned FPROpcodes[] = {
-      AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
-      AArch64::LDRDpost, AArch64::LDRQpost};
-
+  LLT Ty = MRI.getType(Dst);
+  assert(Ty.getSizeInBits() <= 128);
   unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
-  if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
-    Opc = FPROpcodes[Log2_32(MemSize)];
-  else
-    Opc = GPROpcodes[Log2_32(MemSize)];
 
+  unsigned Opc = 0;
+  if (Ld.isPre()) {
+    static constexpr unsigned GPROpcodes[] = {
+        AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
+        AArch64::LDRXpre};
+    static constexpr unsigned FPROpcodes[] = {
+        AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
+        AArch64::LDRQpre};
+    if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+      Opc = FPROpcodes[Log2_32(MemSize)];
+    else
+      Opc = GPROpcodes[Log2_32(MemSize)];
+  } else {
+    static constexpr unsigned GPROpcodes[] = {
+        AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
+        AArch64::LDRXpost};
+    static constexpr unsigned FPROpcodes[] = {
+        AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
+        AArch64::LDRDpost, AArch64::LDRQpost};
+    if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+      Opc = FPROpcodes[Log2_32(MemSize)];
+    else
+      Opc = GPROpcodes[Log2_32(MemSize)];
+  }
   auto Cst = getIConstantVRegVal(Offset, MRI);
   if (!Cst)
     return false; // Shouldn't happen, but just in case.
@@ -5695,23 +5705,34 @@ bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
   Register Base = I.getBaseReg();
   Register Offset = I.getOffsetReg();
   LLT ValTy = MRI.getType(Val);
-
-  if (I.isPre())
-    return false; // TODO: add pre-inc support
+  assert(ValTy.getSizeInBits() <= 128);
 
   unsigned Opc = 0;
-  static constexpr unsigned GPROpcodes[] = {
-      AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
-      AArch64::STRXpost};
-  static constexpr unsigned FPROpcodes[] = {
-      AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
-      AArch64::STRDpost, AArch64::STRQpost};
-
-  assert(ValTy.getSizeInBits() <= 128);
-  if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
-    Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
-  else
-    Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+  if (I.isPre()) {
+    static constexpr unsigned GPROpcodes[] = {
+        AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
+        AArch64::STRXpre};
+    static constexpr unsigned FPROpcodes[] = {
+        AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
+        AArch64::STRQpre};
+
+    if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+      Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+    else
+      Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+  } else {
+    static constexpr unsigned GPROpcodes[] = {
+        AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
+        AArch64::STRXpost};
+    static constexpr unsigned FPROpcodes[] = {
+        AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
+        AArch64::STRDpost, AArch64::STRQpost};
+
+    if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
+      Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+    else
+      Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
+  }
 
   auto Cst = getIConstantVRegVal(Offset, MRI);
   if (!Cst)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir
index e82a0c219068fde..bd0317ec6a1360c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-indexed-load-stores.mir
@@ -87,3 +87,25 @@ body: |
     $q0 = COPY %dst
     RET_ReallyLR implicit $x0, implicit $q0
 ...
+---
+name:            pre_store_s64
+body: |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: pre_store_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %ptr:_(p0) = COPY $x0
+    ; CHECK-NEXT: %val:_(s64) = COPY $x1
+    ; CHECK-NEXT: %offset:_(s64) = G_CONSTANT i64 8
+    ; CHECK-NEXT: %writeback:_(p0) = G_INDEXED_STORE %val(s64), %ptr, %offset(s64), 1 :: (store (s64))
+    ; CHECK-NEXT: $x0 = COPY %writeback(p0)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %ptr:_(p0) = COPY $x0
+    %val:_(s64) = COPY $x1
+    %offset:_(s64) = G_CONSTANT i64 8
+    %writeback:_(p0) = G_INDEXED_STORE %val, %ptr, %offset, 1 :: (store (s64), align 8)
+    $x0 = COPY %writeback
+    RET_ReallyLR implicit $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
index 23886d8bc4a7baa..07744dada4f1faa 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
@@ -83,8 +83,8 @@ define void @test_simple_vector(ptr %ptr) {
 ; CHECK-NEXT:    mov w8, #5 ; =0x5
 ; CHECK-NEXT:    strh w9, [x0, #2]
 ; CHECK-NEXT:    mov w9, #8 ; =0x8
-; CHECK-NEXT:    strh w8, [x0, #4]
-; CHECK-NEXT:    strh w9, [x0, #6]
+; CHECK-NEXT:    strh w8, [x0, #4]!
+; CHECK-NEXT:    strh w9, [x0, #2]
 ; CHECK-NEXT:    ret
   store <2 x i16> <i16 4, i16 7>, ptr %ptr
   %addr2 = getelementptr <2 x i16>, ptr %ptr, i64 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index 050c7e30ad2f934..c24192c6da59a40 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -197,88 +197,40 @@ define ptr @storef64(ptr %ptr, double %index, double %spacing) {
 
 
 define ptr @pref64(ptr %ptr, double %spacing) {
-; CHECK64-LABEL: pref64:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    str d0, [x0, #32]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pref64:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #32
-; GISEL-NEXT:    str d0, [x8, #32]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pref64:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    str d0, [x0, #32]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pref64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str d0, [x0, #32]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds double, ptr %ptr, i64 4
   store double %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
 }
 
 define ptr @pref32(ptr %ptr, float %spacing) {
-; CHECK64-LABEL: pref32:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    str s0, [x0, #12]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pref32:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #12
-; GISEL-NEXT:    str s0, [x8, #12]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pref32:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    str s0, [x0, #12]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pref32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str s0, [x0, #12]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds float, ptr %ptr, i64 3
   store float %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
 }
 
 define ptr @pref16(ptr %ptr, half %spacing) nounwind {
-; CHECK64-LABEL: pref16:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    str h0, [x0, #6]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pref16:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #6
-; GISEL-NEXT:    str h0, [x8, #6]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pref16:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    str h0, [x0, #6]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pref16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str h0, [x0, #6]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds half, ptr %ptr, i64 3
   store half %spacing, ptr %incdec.ptr, align 2
   ret ptr %incdec.ptr
 }
 
 define ptr @pre64(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pre64:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    str x1, [x0, #16]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pre64:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #16
-; GISEL-NEXT:    str x1, [x8, #16]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pre64:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    str x1, [x0, #16]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pre64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str x1, [x0, #16]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 2
   store i64 %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
@@ -297,44 +249,20 @@ define ptr @pre64idxpos256(ptr %ptr, i64 %spacing) {
 }
 
 define ptr @pre64idxneg256(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pre64idxneg256:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    str x1, [x0, #-256]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pre64idxneg256:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    sub x0, x0, #256
-; GISEL-NEXT:    stur x1, [x8, #-256]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pre64idxneg256:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    str x1, [x0, #-256]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pre64idxneg256:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str x1, [x0, #-256]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i64, ptr %ptr, i64 -32
   store i64 %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
 }
 
 define ptr @pre32(ptr %ptr, i32 %spacing) {
-; CHECK64-LABEL: pre32:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    str w1, [x0, #8]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pre32:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #8
-; GISEL-NEXT:    str w1, [x8, #8]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pre32:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    str w1, [x0, #8]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pre32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str w1, [x0, #8]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 2
   store i32 %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
@@ -353,44 +281,20 @@ define ptr @pre32idxpos256(ptr %ptr, i32 %spacing) {
 }
 
 define ptr @pre32idxneg256(ptr %ptr, i32 %spacing) {
-; CHECK64-LABEL: pre32idxneg256:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    str w1, [x0, #-256]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pre32idxneg256:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    sub x0, x0, #256
-; GISEL-NEXT:    stur w1, [x8, #-256]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pre32idxneg256:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    str w1, [x0, #-256]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pre32idxneg256:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str w1, [x0, #-256]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 -64
   store i32 %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
 }
 
 define ptr @pre16(ptr %ptr, i16 %spacing) {
-; CHECK64-LABEL: pre16:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    strh w1, [x0, #4]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pre16:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #4
-; GISEL-NEXT:    strh w1, [x8, #4]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pre16:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    strh w1, [x0, #4]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pre16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strh w1, [x0, #4]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 2
   store i16 %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
@@ -409,44 +313,20 @@ define ptr @pre16idxpos256(ptr %ptr, i16 %spacing) {
 }
 
 define ptr @pre16idxneg256(ptr %ptr, i16 %spacing) {
-; CHECK64-LABEL: pre16idxneg256:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    strh w1, [x0, #-256]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pre16idxneg256:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    sub x0, x0, #256
-; GISEL-NEXT:    sturh w1, [x8, #-256]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pre16idxneg256:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    strh w1, [x0, #-256]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pre16idxneg256:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strh w1, [x0, #-256]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 -128
   store i16 %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
 }
 
 define ptr @pre8(ptr %ptr, i8 %spacing) {
-; CHECK64-LABEL: pre8:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    strb w1, [x0, #2]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pre8:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #2
-; GISEL-NEXT:    strb w1, [x8, #2]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pre8:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    strb w1, [x0, #2]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pre8:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strb w1, [x0, #2]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 2
   store i8 %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
@@ -465,44 +345,20 @@ define ptr @pre8idxpos256(ptr %ptr, i8 %spacing) {
 }
 
 define ptr @pre8idxneg256(ptr %ptr, i8 %spacing) {
-; CHECK64-LABEL: pre8idxneg256:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    strb w1, [x0, #-256]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pre8idxneg256:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    sub x0, x0, #256
-; GISEL-NEXT:    sturb w1, [x8, #-256]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pre8idxneg256:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    strb w1, [x0, #-256]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pre8idxneg256:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strb w1, [x0, #-256]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 -256
   store i8 %spacing, ptr %incdec.ptr, align 4
   ret ptr %incdec.ptr
 }
 
 define ptr @pretrunc64to32(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pretrunc64to32:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    str w1, [x0, #8]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pretrunc64to32:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #8
-; GISEL-NEXT:    str w1, [x8, #8]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pretrunc64to32:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    str w1, [x0, #8]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pretrunc64to32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str w1, [x0, #8]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i32, ptr %ptr, i64 2
   %trunc = trunc i64 %spacing to i32
   store i32 %trunc, ptr %incdec.ptr, align 4
@@ -510,22 +366,10 @@ define ptr @pretrunc64to32(ptr %ptr, i64 %spacing) {
 }
 
 define ptr @pretrunc64to16(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pretrunc64to16:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    strh w1, [x0, #4]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pretrunc64to16:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #4
-; GISEL-NEXT:    strh w1, [x8, #4]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pretrunc64to16:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    strh w1, [x0, #4]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pretrunc64to16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strh w1, [x0, #4]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i16, ptr %ptr, i64 2
   %trunc = trunc i64 %spacing to i16
   store i16 %trunc, ptr %incdec.ptr, align 4
@@ -533,22 +377,10 @@ define ptr @pretrunc64to16(ptr %ptr, i64 %spacing) {
 }
 
 define ptr @pretrunc64to8(ptr %ptr, i64 %spacing) {
-; CHECK64-LABEL: pretrunc64to8:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    strb w1, [x0, #2]!
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: pretrunc64to8:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x8, x0
-; GISEL-NEXT:    add x0, x0, #2
-; GISEL-NEXT:    strb w1, [x8, #2]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: pretrunc64to8:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    strb w1, [x0, #2]!
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: pretrunc64to8:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    strb w1, [x0, #2]!
+; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i8, ptr %ptr, i64 2
   %trunc = trunc i64 %spacing to i8
   store i8 %trunc, ptr %incdec.ptr, align 4
@@ -583,24 +415,11 @@ define ptr @preidxf32(ptr %src, ptr %out) {
 }
 
 define ptr @preidxf16(ptr %src, ptr %out) {
-; CHECK64-LABEL: preidxf16:
-; CHECK64:       ; %bb.0:
-; CHECK64-NEXT:    ldr h0, [x0, #2]!
-; CHECK64-NEXT:    str h0, [x1]
-; CHECK64-NEXT:    ret
-;
-; GISEL-LABEL: preidxf16:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    ldr h0, [x0, #2]
-; GISEL-NEXT:    add x0, x0, #2
-; GISEL-NEXT:    str h0, [x1]
-; GISEL-NEXT:    ret
-;
-; CHECK32-LABEL: preidxf16:
-; CHECK32:       ; %bb.0:
-; CHECK32-NEXT:    ldr h0, [x0, #2]!
-; CHECK32-NEXT:    str h0, [x1]
-; CHECK32-NEXT:    ret
+; CHECK-LABEL: preidxf16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr h0, [x0, #2]!
+; CHECK-NEXT:    str h0, [x1]
+; CHECK-NEXT:    ret
   %ptr = getelementptr inbounds half, ptr %src, i64 1
   %tmp = load half, ptr %ptr, align 2
   store half %tmp, ptr %out, align 2
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 46563f6a8e089c4..0d7620d1c883d68 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -626,20 +626,12 @@
 @ptr = global ptr null
 
 define <8 x i8> @test_v8i8_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v8i8_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr d0, [x0, #40]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v8i8_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr d0, [x0, #40]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v8i8_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #40]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
   %val = load <8 x i8>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -660,20 +652,12 @@ define <8 x i8> @test_v8i8_post_load(ptr %addr) {
 }
 
 define void @test_v8i8_pre_store(<8 x i8> %in, ptr %addr) {
-; SDAG-LABEL: test_v8i8_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str d0, [x0, #40]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v8i8_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str d0, [x0, #40]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v8i8_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
   store <8 x i8> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -694,20 +678,12 @@ define void @test_v8i8_post_store(<8 x i8> %in, ptr %addr) {
 }
 
 define <4 x i16> @test_v4i16_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v4i16_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr d0, [x0, #40]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v4i16_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr d0, [x0, #40]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v4i16_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #40]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
   %val = load <4 x i16>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -728,20 +704,12 @@ define <4 x i16> @test_v4i16_post_load(ptr %addr) {
 }
 
 define void @test_v4i16_pre_store(<4 x i16> %in, ptr %addr) {
-; SDAG-LABEL: test_v4i16_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str d0, [x0, #40]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v4i16_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str d0, [x0, #40]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v4i16_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
   store <4 x i16> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -762,20 +730,12 @@ define void @test_v4i16_post_store(<4 x i16> %in, ptr %addr) {
 }
 
 define <2 x i32> @test_v2i32_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v2i32_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr d0, [x0, #40]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v2i32_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr d0, [x0, #40]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v2i32_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #40]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
   %val = load <2 x i32>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -796,20 +756,12 @@ define <2 x i32> @test_v2i32_post_load(ptr %addr) {
 }
 
 define void @test_v2i32_pre_store(<2 x i32> %in, ptr %addr) {
-; SDAG-LABEL: test_v2i32_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str d0, [x0, #40]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v2i32_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str d0, [x0, #40]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v2i32_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
   store <2 x i32> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -830,20 +782,12 @@ define void @test_v2i32_post_store(<2 x i32> %in, ptr %addr) {
 }
 
 define <2 x float> @test_v2f32_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v2f32_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr d0, [x0, #40]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v2f32_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr d0, [x0, #40]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v2f32_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0, #40]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
   %val = load <2 x float>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -864,20 +808,12 @@ define <2 x float> @test_v2f32_post_load(ptr %addr) {
 }
 
 define void @test_v2f32_pre_store(<2 x float> %in, ptr %addr) {
-; SDAG-LABEL: test_v2f32_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str d0, [x0, #40]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v2f32_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str d0, [x0, #40]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v2f32_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
   store <2 x float> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -907,10 +843,10 @@ define <1 x i64> @test_v1i64_pre_load(ptr %addr) {
 ;
 ; CHECK-GISEL-LABEL: test_v1i64_pre_load:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr d0, [x0, #40]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
+; CHECK-GISEL-NEXT:    ldr x8, [x0, #40]!
+; CHECK-GISEL-NEXT:    adrp x9, _ptr at PAGE
+; CHECK-GISEL-NEXT:    str x0, [x9, _ptr at PAGEOFF]
+; CHECK-GISEL-NEXT:    fmov d0, x8
 ; CHECK-GISEL-NEXT:    ret
   %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
   %val = load <1 x i64>, ptr %newaddr, align 8
@@ -940,20 +876,12 @@ define <1 x i64> @test_v1i64_post_load(ptr %addr) {
 }
 
 define void @test_v1i64_pre_store(<1 x i64> %in, ptr %addr) {
-; SDAG-LABEL: test_v1i64_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str d0, [x0, #40]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v1i64_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #40
-; CHECK-GISEL-NEXT:    str d0, [x0, #40]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v1i64_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str d0, [x0, #40]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
   store <1 x i64> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -974,20 +902,12 @@ define void @test_v1i64_post_store(<1 x i64> %in, ptr %addr) {
 }
 
 define <16 x i8> @test_v16i8_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v16i8_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr q0, [x0, #80]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v16i8_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr q0, [x0, #80]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v16i8_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
   %val = load <16 x i8>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1008,20 +928,12 @@ define <16 x i8> @test_v16i8_post_load(ptr %addr) {
 }
 
 define void @test_v16i8_pre_store(<16 x i8> %in, ptr %addr) {
-; SDAG-LABEL: test_v16i8_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str q0, [x0, #80]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v16i8_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str q0, [x0, #80]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v16i8_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
   store <16 x i8> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1042,20 +954,12 @@ define void @test_v16i8_post_store(<16 x i8> %in, ptr %addr) {
 }
 
 define <8 x i16> @test_v8i16_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v8i16_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr q0, [x0, #80]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v8i16_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr q0, [x0, #80]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v8i16_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
   %val = load <8 x i16>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1076,20 +980,12 @@ define <8 x i16> @test_v8i16_post_load(ptr %addr) {
 }
 
 define void @test_v8i16_pre_store(<8 x i16> %in, ptr %addr) {
-; SDAG-LABEL: test_v8i16_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str q0, [x0, #80]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v8i16_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str q0, [x0, #80]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v8i16_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
   store <8 x i16> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1110,20 +1006,12 @@ define void @test_v8i16_post_store(<8 x i16> %in, ptr %addr) {
 }
 
 define <4 x i32> @test_v4i32_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v4i32_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr q0, [x0, #80]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v4i32_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr q0, [x0, #80]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v4i32_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
   %val = load <4 x i32>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1144,20 +1032,12 @@ define <4 x i32> @test_v4i32_post_load(ptr %addr) {
 }
 
 define void @test_v4i32_pre_store(<4 x i32> %in, ptr %addr) {
-; SDAG-LABEL: test_v4i32_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str q0, [x0, #80]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v4i32_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str q0, [x0, #80]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v4i32_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
   store <4 x i32> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1179,20 +1059,12 @@ define void @test_v4i32_post_store(<4 x i32> %in, ptr %addr) {
 
 
 define <4 x float> @test_v4f32_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v4f32_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr q0, [x0, #80]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v4f32_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr q0, [x0, #80]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v4f32_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
   %val = load <4 x float>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1213,20 +1085,12 @@ define <4 x float> @test_v4f32_post_load(ptr %addr) {
 }
 
 define void @test_v4f32_pre_store(<4 x float> %in, ptr %addr) {
-; SDAG-LABEL: test_v4f32_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str q0, [x0, #80]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v4f32_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str q0, [x0, #80]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v4f32_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
   store <4 x float> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1248,20 +1112,12 @@ define void @test_v4f32_post_store(<4 x float> %in, ptr %addr) {
 
 
 define <2 x i64> @test_v2i64_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v2i64_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr q0, [x0, #80]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v2i64_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr q0, [x0, #80]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v2i64_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
   %val = load <2 x i64>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1282,20 +1138,12 @@ define <2 x i64> @test_v2i64_post_load(ptr %addr) {
 }
 
 define void @test_v2i64_pre_store(<2 x i64> %in, ptr %addr) {
-; SDAG-LABEL: test_v2i64_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str q0, [x0, #80]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v2i64_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str q0, [x0, #80]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v2i64_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
   store <2 x i64> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1317,20 +1165,12 @@ define void @test_v2i64_post_store(<2 x i64> %in, ptr %addr) {
 
 
 define <2 x double> @test_v2f64_pre_load(ptr %addr) {
-; SDAG-LABEL: test_v2f64_pre_load:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    ldr q0, [x0, #80]!
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v2f64_pre_load:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr q0, [x0, #80]
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v2f64_pre_load:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0, #80]!
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
   %val = load <2 x double>, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr
@@ -1351,20 +1191,12 @@ define <2 x double> @test_v2f64_post_load(ptr %addr) {
 }
 
 define void @test_v2f64_pre_store(<2 x double> %in, ptr %addr) {
-; SDAG-LABEL: test_v2f64_pre_store:
-; SDAG:       ; %bb.0:
-; SDAG-NEXT:    adrp x8, _ptr at PAGE
-; SDAG-NEXT:    str q0, [x0, #80]!
-; SDAG-NEXT:    str x0, [x8, _ptr at PAGEOFF]
-; SDAG-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: test_v2f64_pre_store:
-; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    adrp x8, _ptr at PAGE
-; CHECK-GISEL-NEXT:    add x9, x0, #80
-; CHECK-GISEL-NEXT:    str q0, [x0, #80]
-; CHECK-GISEL-NEXT:    str x9, [x8, _ptr at PAGEOFF]
-; CHECK-GISEL-NEXT:    ret
+; CHECK-LABEL: test_v2f64_pre_store:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _ptr at PAGE
+; CHECK-NEXT:    str q0, [x0, #80]!
+; CHECK-NEXT:    str x0, [x8, _ptr at PAGEOFF]
+; CHECK-NEXT:    ret
   %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
   store <2 x double> %in, ptr %newaddr, align 8
   store ptr %newaddr, ptr @ptr

>From 033bab5da72ccc643ea0766eb58b2c4811bb1c94 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Wed, 25 Oct 2023 02:40:56 -0700
Subject: [PATCH 2/3] Address comments

---
 llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp           | 9 ++++-----
 .../Target/AArch64/GISel/AArch64InstructionSelector.cpp  | 4 ++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index a404bd0da19e172..3c2b5f490ccb871 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1142,21 +1142,20 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
   }
 
   // Avoid increasing cross-block register pressure.
-  for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
+  for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
     if (AddrUse.getParent() != LdSt.getParent())
       return false;
-  }
 
   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
   // That might allow us to end base's liveness here by adjusting the constant.
   bool RealUse = false;
-  for (auto &PtrUse : MRI.use_nodbg_instructions(Addr)) {
-    if (!dominates(LdSt, PtrUse))
+  for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
+    if (!dominates(LdSt, AddrUse))
       return false; // All use must be dominated by the load/store.
 
     // If Ptr may be folded in addressing mode of other use, then it's
     // not profitable to do this transformation.
-    if (auto *UseLdSt = dyn_cast<GLoadStore>(&PtrUse)) {
+    if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
       if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
         RealUse = true;
     } else {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 942edbd78b6452b..9c5b34166ffaf81 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5660,7 +5660,7 @@ bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
   Register Base = Ld.getBaseReg();
   Register Offset = Ld.getOffsetReg();
   LLT Ty = MRI.getType(Dst);
-  assert(Ty.getSizeInBits() <= 128);
+  assert(Ty.getSizeInBits() <= 128 && "Unexpected type for indexed load");
   unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
 
   unsigned Opc = 0;
@@ -5705,7 +5705,7 @@ bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
   Register Base = I.getBaseReg();
   Register Offset = I.getOffsetReg();
   LLT ValTy = MRI.getType(Val);
-  assert(ValTy.getSizeInBits() <= 128);
+  assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
 
   unsigned Opc = 0;
   if (I.isPre()) {

>From bca2d67d5bdf6f3af0815c976ac6c24247c50b2c Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Wed, 25 Oct 2023 23:29:21 -0700
Subject: [PATCH 3/3] Remove unnecessary change to AArch64Combine.td

---
 llvm/lib/Target/AArch64/AArch64Combine.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 9ae1dd99f20f45d..017c4523c23a184 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -245,7 +245,7 @@ def AArch64PostLegalizerLowering
 // Post-legalization combines which are primarily optimizations.
 def AArch64PostLegalizerCombiner
     : GICombiner<"AArch64PostLegalizerCombinerImpl",
-                       [copy_prop, combines_for_extload, reassocs,
+                       [copy_prop, combines_for_extload,
                         combine_indexed_load_store,
                         sext_trunc_sextload, mutate_anyext_to_zext,
                         hoist_logic_op_with_same_opcode_hands,



More information about the llvm-commits mailing list