[llvm] d58bd21 - [GlobalISel] Look between instructions to be matched (#101675)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 08:56:44 PDT 2024


Author: chuongg3
Date: 2024-08-27T16:56:40+01:00
New Revision: d58bd21150914b4a2304c24b04a714bfb251c594

URL: https://github.com/llvm/llvm-project/commit/d58bd21150914b4a2304c24b04a714bfb251c594
DIFF: https://github.com/llvm/llvm-project/commit/d58bd21150914b4a2304c24b04a714bfb251c594.diff

LOG: [GlobalISel] Look between instructions to be matched (#101675)

When a pattern is matched in TableGen, a check is run called
isObviouslySafeToFold(). One of the condition that it checks for is
whether the instructions that are being matched are consecutive, so the
instruction's insertion point does not change.

This patch allows the movement of the insertion point of a load
instruction if none of the intervening instructions are stores or have
side-effects.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
    llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
    llvm/test/CodeGen/AArch64/arm64-ld1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
index 26752369a7711a..1a887ab5794b06 100644
--- a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -61,15 +61,42 @@ bool GIMatchTableExecutor::isBaseWithConstantOffset(
 
 bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
                                                  MachineInstr &IntoMI) const {
+  auto IntoMIIter = IntoMI.getIterator();
+
   // Immediate neighbours are already folded.
   if (MI.getParent() == IntoMI.getParent() &&
-      std::next(MI.getIterator()) == IntoMI.getIterator())
+      std::next(MI.getIterator()) == IntoMIIter)
     return true;
 
   // Convergent instructions cannot be moved in the CFG.
   if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
     return false;
 
-  return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
-         !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
+  if (MI.isLoadFoldBarrier())
+    return false;
+
+  // If the load is simple, check instructions between MI and IntoMI
+  if (MI.mayLoad() && MI.getParent() == IntoMI.getParent()) {
+    if (MI.memoperands_empty())
+      return false;
+    auto &MMO = **(MI.memoperands_begin());
+    if (MMO.isAtomic() || MMO.isVolatile())
+      return false;
+
+    // Ensure instructions between MI and IntoMI are not affected when combined
+    unsigned Iter = 0;
+    const unsigned MaxIter = 20;
+    for (auto &CurrMI :
+         instructionsWithoutDebug(MI.getIterator(), IntoMI.getIterator())) {
+      if (CurrMI.isLoadFoldBarrier())
+        return false;
+
+      if (Iter++ == MaxIter)
+        return false;
+    }
+
+    return true;
+  }
+
+  return true;
 }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
index 3a46b2a943288b..20e1a93fe2e639 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
@@ -41,8 +41,10 @@
   define void @anyext_on_fpr() { ret void }
   define void @anyext_on_fpr8() { ret void }
 
-...
+  define void @load_s32_gpr_LD1() { ret void }
+  define void @load_s32_gpr_GIM() { ret void }
 
+...
 ---
 name:            load_s64_gpr
 legalized:       true
@@ -57,7 +59,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s64_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 0 :: (load (s64) from %ir.addr)
     ; CHECK-NEXT: $x0 = COPY [[LDRXui]]
     %0(p0) = COPY $x0
@@ -79,7 +83,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s32_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from %ir.addr)
     ; CHECK-NEXT: $w0 = COPY [[LDRWui]]
     %0(p0) = COPY $x0
@@ -97,7 +103,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s16_gpr_anyext
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
     %0:gpr(p0) = COPY $x0
@@ -119,7 +127,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s16_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
     ; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -139,7 +149,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s8_gpr_anyext
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
     %0:gpr(p0) = COPY $x0
@@ -161,7 +173,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s8_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
     ; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -188,7 +202,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_fi_s64_gpr
-    ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load (s64))
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load (s64))
     ; CHECK-NEXT: $x0 = COPY [[LDRXui]]
     %0(p0) = G_FRAME_INDEX %stack.0.ptr0
     %1(s64) = G_LOAD %0 :: (load (s64))
@@ -211,7 +227,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_128_s64_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 16 :: (load (s64) from %ir.addr)
     ; CHECK-NEXT: $x0 = COPY [[LDRXui]]
     %0(p0) = COPY $x0
@@ -237,7 +255,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_512_s32_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 128 :: (load (s32) from %ir.addr)
     ; CHECK-NEXT: $w0 = COPY [[LDRWui]]
     %0(p0) = COPY $x0
@@ -263,7 +283,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_64_s16_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
     ; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -291,7 +313,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_1_s8_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
     ; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -317,7 +341,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s64_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (s64) from %ir.addr)
     ; CHECK-NEXT: $d0 = COPY [[LDRDui]]
     %0(p0) = COPY $x0
@@ -339,7 +365,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s32_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0 :: (load (s32) from %ir.addr)
     ; CHECK-NEXT: $s0 = COPY [[LDRSui]]
     %0(p0) = COPY $x0
@@ -361,7 +389,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s16_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: $h0 = COPY [[LDRHui]]
     %0(p0) = COPY $x0
@@ -383,7 +413,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s8_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: $b0 = COPY [[LDRBui]]
     %0(p0) = COPY $x0
@@ -407,7 +439,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_8_s64_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 1 :: (load (s64) from %ir.addr)
     ; CHECK-NEXT: $d0 = COPY [[LDRDui]]
     %0(p0) = COPY $x0
@@ -433,7 +467,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_16_s32_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 4 :: (load (s32) from %ir.addr)
     ; CHECK-NEXT: $s0 = COPY [[LDRSui]]
     %0(p0) = COPY $x0
@@ -459,7 +495,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_64_s16_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 32 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: $h0 = COPY [[LDRHui]]
     %0(p0) = COPY $x0
@@ -485,7 +523,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_32_s8_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 32 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: $b0 = COPY [[LDRBui]]
     %0(p0) = COPY $x0
@@ -508,7 +548,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_v2s32
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (<2 x s32>) from %ir.addr)
     ; CHECK-NEXT: $d0 = COPY [[LDRDui]]
     %0(p0) = COPY $x0
@@ -529,7 +571,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_v2s64
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<2 x s64>) from %ir.addr)
     ; CHECK-NEXT: $q0 = COPY [[LDRQui]]
     %0(p0) = COPY $x0
@@ -712,3 +756,63 @@ body:             |
     RET_ReallyLR
 
 ...
+---
+name:            load_s32_gpr_LD1
+legalized:       true
+regBankSelected: true
+
+body:             |
+  bb.0:
+    liveins: $q0, $x0
+
+    ; CHECK-LABEL: name: load_s32_gpr_LD1
+    ; CHECK: liveins: $q0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: [[LD1i32_:%[0-9]+]]:fpr128 = LD1i32 [[COPY]], 0, [[COPY1]] :: (load (s32))
+    ; CHECK-NEXT: $q0 = COPY [[LD1i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:gpr(p0) = COPY $x0
+    %2:fpr(s32) = G_LOAD %1(p0) :: (load (s32))
+    %3:gpr(s32) = G_CONSTANT i32 3
+    %5:gpr(s64) = G_CONSTANT i64 0
+    %4:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %2(s32), %5(s64)
+    $q0 = COPY %4(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+
+name:            load_s32_gpr_GIM
+legalized:       true
+regBankSelected: true
+
+body:             |
+  bb.0:
+    liveins: $q0, $x0
+    ;This test should not select an LD1 instruction as there is a store instruction between G_INSERT_VECTOR_ELT and G_LOAD 
+    ; CHECK-LABEL: name: load_s32_gpr_GIM
+    ; CHECK: liveins: $q0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY1]], 0 :: (load (s32))
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 3
+    ; CHECK-NEXT: STRWui [[MOVi32imm]], [[COPY1]], 0 :: (store (s32))
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[LDRSui]], %subreg.ssub
+    ; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[COPY]], 0, [[INSERT_SUBREG]], 0
+    ; CHECK-NEXT: $q0 = COPY [[INSvi32lane]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:gpr(p0) = COPY $x0
+    %2:fpr(s32) = G_LOAD %1(p0) :: (load (s32))
+    %3:gpr(s32) = G_CONSTANT i32 3
+    G_STORE %3(s32), %1(p0) :: (store (s32))
+    %5:gpr(s64) = G_CONSTANT i64 0
+    %4:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %2(s32), %5(s64)
+    $q0 = COPY %4(<4 x s32>)
+    RET_ReallyLR implicit $q0
+...

diff  --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 628fb550a0532b..720951eca6a344 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -13417,9 +13417,8 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr h1, [x0]
+; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    mov.h v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
@@ -13465,12 +13464,11 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr h1, [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    mov.h v0[1], v1[0]
-; CHECK-GI-NEXT:    str x8, [x1]
+; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
@@ -13509,9 +13507,8 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i32, ptr %bar
@@ -13557,12 +13554,11 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
-; CHECK-GI-NEXT:    str x8, [x1]
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
@@ -13601,9 +13597,8 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr d1, [x0]
+; CHECK-GI-NEXT:    ld1.d { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i64, ptr %bar
@@ -13643,9 +13638,8 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load float, ptr %bar
@@ -13691,12 +13685,11 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
-; CHECK-GI-NEXT:    str x8, [x1]
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
@@ -13735,9 +13728,8 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr d1, [x0]
+; CHECK-GI-NEXT:    ld1.d { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load double, ptr %bar
@@ -13792,15 +13784,14 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ldr h1, [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
-; CHECK-GI-NEXT:    mov.h v0[1], v1[0]
-; CHECK-GI-NEXT:    ldr d2, [x3]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    cnt.8b v2, v2
-; CHECK-GI-NEXT:    uaddlp.4h v2, v2
-; CHECK-GI-NEXT:    uaddlp.2s v1, v2
+; CHECK-GI-NEXT:    ldr d1, [x3]
+; CHECK-GI-NEXT:    cnt.8b v1, v1
+; CHECK-GI-NEXT:    uaddlp.4h v1, v1
+; CHECK-GI-NEXT:    uaddlp.2s v1, v1
 ; CHECK-GI-NEXT:    str d1, [x3]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
@@ -13989,24 +13980,14 @@ define void  @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr
 }
 
 define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) {
-; CHECK-SD-LABEL: test_inc_cycle:
-; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    adrp x9, _var at PAGE
-; CHECK-SD-NEXT:    fmov x8, d0
-; CHECK-SD-NEXT:    add x8, x0, x8, lsl #2
-; CHECK-SD-NEXT:    str x8, [x9, _var at PAGEOFF]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_inc_cycle:
-; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    adrp x9, _var at PAGE
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    add x8, x0, x8, lsl #2
-; CHECK-GI-NEXT:    str x8, [x9, _var at PAGEOFF]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_inc_cycle:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    adrp x9, _var at PAGE
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x8, x0, x8, lsl #2
+; CHECK-NEXT:    str x8, [x9, _var at PAGEOFF]
+; CHECK-NEXT:    ret
   %elt = load i32, ptr %in
   %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 54b96520dce41d..d3a8f59c96f993 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1021,16 +1021,10 @@ define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
 }
 
 define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_8h:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.h { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_8h:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr h1, [x0]
-; CHECK-GI-NEXT:    mov.h v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
@@ -1038,16 +1032,10 @@ define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
 }
 
 define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_4s:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_4s:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
@@ -1055,16 +1043,10 @@ define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
 }
 
 define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_4s_float:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_4s_float:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_4s_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
@@ -1072,16 +1054,10 @@ define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) {
 }
 
 define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2d:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.d { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_2d:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr d1, [x0]
-; CHECK-GI-NEXT:    mov.d v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.d { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i64, ptr %bar
   %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
@@ -1089,16 +1065,10 @@ define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
 }
 
 define <2 x double> @ld1_2d_double(<2 x double> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2d_double:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.d { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_2d_double:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr d1, [x0]
-; CHECK-GI-NEXT:    mov.d v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_2d_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.d { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load double, ptr %bar
   %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
@@ -1137,20 +1107,12 @@ define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
 }
 
 define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_4h:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    ld1.h { v0 }[0], [x0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_4h:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr h1, [x0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov.h v0[0], v1[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
@@ -1158,20 +1120,12 @@ define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
 }
 
 define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2s:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_2s:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
@@ -1179,20 +1133,12 @@ define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
 }
 
 define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2s_float:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_2s_float:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_2s_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0


        


More information about the llvm-commits mailing list