[llvm] [GlobalISel] Look between instructions to be matched (PR #101675)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 2 06:47:34 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: None (chuongg3)

<details>
<summary>Changes</summary>

When a pattern is matched in TableGen, a check is run called isObviouslySafeToFold(). One of the condition that it checks for is whether the instructions that are being matched are consecutive, so the instruction's insertion point does not change.

This patch allows the movement of the insertion point of a load instruction if none of the intervening instructions are stores or have side-effects.

---

Patch is 26.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101675.diff


4 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp (+31-2) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir (+126-22) 
- (modified) llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll (+24-43) 
- (modified) llvm/test/CodeGen/AArch64/arm64-ld1.ll (+38-92) 


``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
index 26752369a7711..8fd644d6ce7d8 100644
--- a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -70,6 +70,35 @@ bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
   if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
     return false;
 
-  return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
-         !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
+  if (!MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+      !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty()) {
+    return true;
+  }
+
+  // If the load is simple, check instructions between MI and IntoMI
+  auto CurrMI = MI.getIterator();
+  if (MI.mayLoad() && MI.getParent() == IntoMI.getParent()) {
+    if (MI.memoperands_empty())
+      return false;
+    auto MMO = **(MI.memoperands_begin());
+    if (MMO.isAtomic() || MMO.isVolatile())
+      return false;
+
+    // Ensure instructions between MI and IntoMI sare not affected when combined
+    for (unsigned i = 1; i < 15 && CurrMI != IntoMI.getIterator(); i++) {
+      if (CurrMI->mayStore() || CurrMI->mayRaiseFPException() ||
+          CurrMI->hasUnmodeledSideEffects() ||
+          !CurrMI->implicit_operands().empty()) {
+        return false;
+      }
+
+      CurrMI = std::next(CurrMI);
+    }
+
+    // If there are more than 15 instructions between MI and IntoMI, return
+    // false
+    return CurrMI == IntoMI.getIterator();
+  }
+
+  return false;
 }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
index 3a46b2a943288..20e1a93fe2e63 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
@@ -41,8 +41,10 @@
   define void @anyext_on_fpr() { ret void }
   define void @anyext_on_fpr8() { ret void }
 
-...
+  define void @load_s32_gpr_LD1() { ret void }
+  define void @load_s32_gpr_GIM() { ret void }
 
+...
 ---
 name:            load_s64_gpr
 legalized:       true
@@ -57,7 +59,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s64_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 0 :: (load (s64) from %ir.addr)
     ; CHECK-NEXT: $x0 = COPY [[LDRXui]]
     %0(p0) = COPY $x0
@@ -79,7 +83,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s32_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from %ir.addr)
     ; CHECK-NEXT: $w0 = COPY [[LDRWui]]
     %0(p0) = COPY $x0
@@ -97,7 +103,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s16_gpr_anyext
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
     %0:gpr(p0) = COPY $x0
@@ -119,7 +127,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s16_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
     ; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -139,7 +149,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s8_gpr_anyext
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
     %0:gpr(p0) = COPY $x0
@@ -161,7 +173,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s8_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
     ; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -188,7 +202,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_fi_s64_gpr
-    ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load (s64))
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load (s64))
     ; CHECK-NEXT: $x0 = COPY [[LDRXui]]
     %0(p0) = G_FRAME_INDEX %stack.0.ptr0
     %1(s64) = G_LOAD %0 :: (load (s64))
@@ -211,7 +227,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_128_s64_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 16 :: (load (s64) from %ir.addr)
     ; CHECK-NEXT: $x0 = COPY [[LDRXui]]
     %0(p0) = COPY $x0
@@ -237,7 +255,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_512_s32_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 128 :: (load (s32) from %ir.addr)
     ; CHECK-NEXT: $w0 = COPY [[LDRWui]]
     %0(p0) = COPY $x0
@@ -263,7 +283,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_64_s16_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
     ; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -291,7 +313,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_1_s8_gpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
     ; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -317,7 +341,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s64_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (s64) from %ir.addr)
     ; CHECK-NEXT: $d0 = COPY [[LDRDui]]
     %0(p0) = COPY $x0
@@ -339,7 +365,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s32_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0 :: (load (s32) from %ir.addr)
     ; CHECK-NEXT: $s0 = COPY [[LDRSui]]
     %0(p0) = COPY $x0
@@ -361,7 +389,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s16_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: $h0 = COPY [[LDRHui]]
     %0(p0) = COPY $x0
@@ -383,7 +413,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_s8_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: $b0 = COPY [[LDRBui]]
     %0(p0) = COPY $x0
@@ -407,7 +439,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_8_s64_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 1 :: (load (s64) from %ir.addr)
     ; CHECK-NEXT: $d0 = COPY [[LDRDui]]
     %0(p0) = COPY $x0
@@ -433,7 +467,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_16_s32_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 4 :: (load (s32) from %ir.addr)
     ; CHECK-NEXT: $s0 = COPY [[LDRSui]]
     %0(p0) = COPY $x0
@@ -459,7 +495,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_64_s16_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 32 :: (load (s16) from %ir.addr)
     ; CHECK-NEXT: $h0 = COPY [[LDRHui]]
     %0(p0) = COPY $x0
@@ -485,7 +523,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_gep_32_s8_fpr
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 32 :: (load (s8) from %ir.addr)
     ; CHECK-NEXT: $b0 = COPY [[LDRBui]]
     %0(p0) = COPY $x0
@@ -508,7 +548,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_v2s32
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (<2 x s32>) from %ir.addr)
     ; CHECK-NEXT: $d0 = COPY [[LDRDui]]
     %0(p0) = COPY $x0
@@ -529,7 +571,9 @@ body:             |
     liveins: $x0
 
     ; CHECK-LABEL: name: load_v2s64
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
     ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<2 x s64>) from %ir.addr)
     ; CHECK-NEXT: $q0 = COPY [[LDRQui]]
     %0(p0) = COPY $x0
@@ -712,3 +756,63 @@ body:             |
     RET_ReallyLR
 
 ...
+---
+name:            load_s32_gpr_LD1
+legalized:       true
+regBankSelected: true
+
+body:             |
+  bb.0:
+    liveins: $q0, $x0
+
+    ; CHECK-LABEL: name: load_s32_gpr_LD1
+    ; CHECK: liveins: $q0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: [[LD1i32_:%[0-9]+]]:fpr128 = LD1i32 [[COPY]], 0, [[COPY1]] :: (load (s32))
+    ; CHECK-NEXT: $q0 = COPY [[LD1i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:gpr(p0) = COPY $x0
+    %2:fpr(s32) = G_LOAD %1(p0) :: (load (s32))
+    %3:gpr(s32) = G_CONSTANT i32 3
+    %5:gpr(s64) = G_CONSTANT i64 0
+    %4:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %2(s32), %5(s64)
+    $q0 = COPY %4(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+
+name:            load_s32_gpr_GIM
+legalized:       true
+regBankSelected: true
+
+body:             |
+  bb.0:
+    liveins: $q0, $x0
+    ;This test should not select an LD1 instruction as there is a store instruction between G_INSERT_VECTOR_ELT and G_LOAD 
+    ; CHECK-LABEL: name: load_s32_gpr_GIM
+    ; CHECK: liveins: $q0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY1]], 0 :: (load (s32))
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 3
+    ; CHECK-NEXT: STRWui [[MOVi32imm]], [[COPY1]], 0 :: (store (s32))
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[LDRSui]], %subreg.ssub
+    ; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[COPY]], 0, [[INSERT_SUBREG]], 0
+    ; CHECK-NEXT: $q0 = COPY [[INSvi32lane]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:gpr(p0) = COPY $x0
+    %2:fpr(s32) = G_LOAD %1(p0) :: (load (s32))
+    %3:gpr(s32) = G_CONSTANT i32 3
+    G_STORE %3(s32), %1(p0) :: (store (s32))
+    %5:gpr(s64) = G_CONSTANT i64 0
+    %4:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %2(s32), %5(s64)
+    $q0 = COPY %4(<4 x s32>)
+    RET_ReallyLR implicit $q0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 628fb550a0532..720951eca6a34 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -13417,9 +13417,8 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr h1, [x0]
+; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    mov.h v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
@@ -13465,12 +13464,11 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr h1, [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    mov.h v0[1], v1[0]
-; CHECK-GI-NEXT:    str x8, [x1]
+; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
@@ -13509,9 +13507,8 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i32, ptr %bar
@@ -13557,12 +13554,11 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
-; CHECK-GI-NEXT:    str x8, [x1]
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
@@ -13601,9 +13597,8 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr d1, [x0]
+; CHECK-GI-NEXT:    ld1.d { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i64, ptr %bar
@@ -13643,9 +13638,8 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load float, ptr %bar
@@ -13691,12 +13685,11 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
-; CHECK-GI-NEXT:    str x8, [x1]
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
@@ -13735,9 +13728,8 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr d1, [x0]
+; CHECK-GI-NEXT:    ld1.d { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load double, ptr %bar
@@ -13792,15 +13784,14 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT:    ldr h1, [x0]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
-; CHECK-GI-NEXT:    mov.h v0[1], v1[0]
-; CHECK-GI-NEXT:    ldr d2, [x3]
 ; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    cnt.8b v2, v2
-; CHECK-GI-NEXT:    uaddlp.4h v2, v2
-; CHECK-GI-NEXT:    uaddlp.2s v1, v2
+; CHECK-GI-NEXT:    ldr d1, [x3]
+; CHECK-GI-NEXT:    cnt.8b v1, v1
+; CHECK-GI-NEXT:    uaddlp.4h v1, v1
+; CHECK-GI-NEXT:    uaddlp.2s v1, v1
 ; CHECK-GI-NEXT:    str d1, [x3]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
@@ -13989,24 +13980,14 @@ define void  @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr
 }
 
 define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) {
-; CHECK-SD-LABEL: test_inc_cycle:
-; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    adrp x9, _var at PAGE
-; CHECK-SD-NEXT:    fmov x8, d0
-; CHECK-SD-NEXT:    add x8, x0, x8, lsl #2
-; CHECK-SD-NEXT:    str x8, [x9, _var at PAGEOFF]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_inc_cycle:
-; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    adrp x9, _var at PAGE
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    add x8, x0, x8, lsl #2
-; CHECK-GI-NEXT:    str x8, [x9, _var at PAGEOFF]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_inc_cycle:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    adrp x9, _var at PAGE
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x8, x0, x8, lsl #2
+; CHECK-NEXT:    str x8, [x9, _var at PAGEOFF]
+; CHECK-NEXT:    ret
   %elt = load i32, ptr %in
   %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 54b96520dce41..d3a8f59c96f99 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1021,16 +1021,10 @@ define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
 }
 
 define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_8h:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.h { v0 }[0], [x0]
-; CHECK-SD-NEXT: ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/101675


More information about the llvm-commits mailing list