[llvm] d58bd21 - [GlobalISel] Look between instructions to be matched (#101675)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 08:56:44 PDT 2024
Author: chuongg3
Date: 2024-08-27T16:56:40+01:00
New Revision: d58bd21150914b4a2304c24b04a714bfb251c594
URL: https://github.com/llvm/llvm-project/commit/d58bd21150914b4a2304c24b04a714bfb251c594
DIFF: https://github.com/llvm/llvm-project/commit/d58bd21150914b4a2304c24b04a714bfb251c594.diff
LOG: [GlobalISel] Look between instructions to be matched (#101675)
When a pattern is matched in TableGen, a check is run called
isObviouslySafeToFold(). One of the condition that it checks for is
whether the instructions that are being matched are consecutive, so the
instruction's insertion point does not change.
This patch allows the movement of the insertion point of a load
instruction if none of the intervening instructions are stores or have
side-effects.
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
llvm/test/CodeGen/AArch64/arm64-ld1.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
index 26752369a7711a..1a887ab5794b06 100644
--- a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -61,15 +61,42 @@ bool GIMatchTableExecutor::isBaseWithConstantOffset(
bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
MachineInstr &IntoMI) const {
+ auto IntoMIIter = IntoMI.getIterator();
+
// Immediate neighbours are already folded.
if (MI.getParent() == IntoMI.getParent() &&
- std::next(MI.getIterator()) == IntoMI.getIterator())
+ std::next(MI.getIterator()) == IntoMIIter)
return true;
// Convergent instructions cannot be moved in the CFG.
if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
return false;
- return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
- !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
+ if (MI.isLoadFoldBarrier())
+ return false;
+
+ // If the load is simple, check instructions between MI and IntoMI
+ if (MI.mayLoad() && MI.getParent() == IntoMI.getParent()) {
+ if (MI.memoperands_empty())
+ return false;
+ auto &MMO = **(MI.memoperands_begin());
+ if (MMO.isAtomic() || MMO.isVolatile())
+ return false;
+
+ // Ensure instructions between MI and IntoMI are not affected when combined
+ unsigned Iter = 0;
+ const unsigned MaxIter = 20;
+ for (auto &CurrMI :
+ instructionsWithoutDebug(MI.getIterator(), IntoMI.getIterator())) {
+ if (CurrMI.isLoadFoldBarrier())
+ return false;
+
+ if (Iter++ == MaxIter)
+ return false;
+ }
+
+ return true;
+ }
+
+ return true;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
index 3a46b2a943288b..20e1a93fe2e639 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
@@ -41,8 +41,10 @@
define void @anyext_on_fpr() { ret void }
define void @anyext_on_fpr8() { ret void }
-...
+ define void @load_s32_gpr_LD1() { ret void }
+ define void @load_s32_gpr_GIM() { ret void }
+...
---
name: load_s64_gpr
legalized: true
@@ -57,7 +59,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s64_gpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 0 :: (load (s64) from %ir.addr)
; CHECK-NEXT: $x0 = COPY [[LDRXui]]
%0(p0) = COPY $x0
@@ -79,7 +83,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s32_gpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from %ir.addr)
; CHECK-NEXT: $w0 = COPY [[LDRWui]]
%0(p0) = COPY $x0
@@ -97,7 +103,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s16_gpr_anyext
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr)
; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
%0:gpr(p0) = COPY $x0
@@ -119,7 +127,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s16_gpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -139,7 +149,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s8_gpr_anyext
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr)
; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
%0:gpr(p0) = COPY $x0
@@ -161,7 +173,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s8_gpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -188,7 +202,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_fi_s64_gpr
- ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load (s64))
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load (s64))
; CHECK-NEXT: $x0 = COPY [[LDRXui]]
%0(p0) = G_FRAME_INDEX %stack.0.ptr0
%1(s64) = G_LOAD %0 :: (load (s64))
@@ -211,7 +227,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_gep_128_s64_gpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 16 :: (load (s64) from %ir.addr)
; CHECK-NEXT: $x0 = COPY [[LDRXui]]
%0(p0) = COPY $x0
@@ -237,7 +255,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_gep_512_s32_gpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 128 :: (load (s32) from %ir.addr)
; CHECK-NEXT: $w0 = COPY [[LDRWui]]
%0(p0) = COPY $x0
@@ -263,7 +283,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_gep_64_s16_gpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load (s16) from %ir.addr)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]]
; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -291,7 +313,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_gep_1_s8_gpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load (s8) from %ir.addr)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]]
; CHECK-NEXT: $w0 = COPY [[COPY1]]
@@ -317,7 +341,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s64_fpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (s64) from %ir.addr)
; CHECK-NEXT: $d0 = COPY [[LDRDui]]
%0(p0) = COPY $x0
@@ -339,7 +365,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s32_fpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0 :: (load (s32) from %ir.addr)
; CHECK-NEXT: $s0 = COPY [[LDRSui]]
%0(p0) = COPY $x0
@@ -361,7 +389,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s16_fpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16) from %ir.addr)
; CHECK-NEXT: $h0 = COPY [[LDRHui]]
%0(p0) = COPY $x0
@@ -383,7 +413,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_s8_fpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8) from %ir.addr)
; CHECK-NEXT: $b0 = COPY [[LDRBui]]
%0(p0) = COPY $x0
@@ -407,7 +439,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_gep_8_s64_fpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 1 :: (load (s64) from %ir.addr)
; CHECK-NEXT: $d0 = COPY [[LDRDui]]
%0(p0) = COPY $x0
@@ -433,7 +467,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_gep_16_s32_fpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 4 :: (load (s32) from %ir.addr)
; CHECK-NEXT: $s0 = COPY [[LDRSui]]
%0(p0) = COPY $x0
@@ -459,7 +495,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_gep_64_s16_fpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 32 :: (load (s16) from %ir.addr)
; CHECK-NEXT: $h0 = COPY [[LDRHui]]
%0(p0) = COPY $x0
@@ -485,7 +523,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_gep_32_s8_fpr
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 32 :: (load (s8) from %ir.addr)
; CHECK-NEXT: $b0 = COPY [[LDRBui]]
%0(p0) = COPY $x0
@@ -508,7 +548,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_v2s32
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (<2 x s32>) from %ir.addr)
; CHECK-NEXT: $d0 = COPY [[LDRDui]]
%0(p0) = COPY $x0
@@ -529,7 +571,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: load_v2s64
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<2 x s64>) from %ir.addr)
; CHECK-NEXT: $q0 = COPY [[LDRQui]]
%0(p0) = COPY $x0
@@ -712,3 +756,63 @@ body: |
RET_ReallyLR
...
+---
+name: load_s32_gpr_LD1
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $q0, $x0
+
+ ; CHECK-LABEL: name: load_s32_gpr_LD1
+ ; CHECK: liveins: $q0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[LD1i32_:%[0-9]+]]:fpr128 = LD1i32 [[COPY]], 0, [[COPY1]] :: (load (s32))
+ ; CHECK-NEXT: $q0 = COPY [[LD1i32_]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:fpr(<4 x s32>) = COPY $q0
+ %1:gpr(p0) = COPY $x0
+ %2:fpr(s32) = G_LOAD %1(p0) :: (load (s32))
+ %3:gpr(s32) = G_CONSTANT i32 3
+ %5:gpr(s64) = G_CONSTANT i64 0
+ %4:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %2(s32), %5(s64)
+ $q0 = COPY %4(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+
+name: load_s32_gpr_GIM
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $q0, $x0
+ ;This test should not select an LD1 instruction as there is a store instruction between G_INSERT_VECTOR_ELT and G_LOAD
+ ; CHECK-LABEL: name: load_s32_gpr_GIM
+ ; CHECK: liveins: $q0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY1]], 0 :: (load (s32))
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 3
+ ; CHECK-NEXT: STRWui [[MOVi32imm]], [[COPY1]], 0 :: (store (s32))
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[LDRSui]], %subreg.ssub
+ ; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[COPY]], 0, [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: $q0 = COPY [[INSvi32lane]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:fpr(<4 x s32>) = COPY $q0
+ %1:gpr(p0) = COPY $x0
+ %2:fpr(s32) = G_LOAD %1(p0) :: (load (s32))
+ %3:gpr(s32) = G_CONSTANT i32 3
+ G_STORE %3(s32), %1(p0) :: (store (s32))
+ %5:gpr(s64) = G_CONSTANT i64 0
+ %4:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %2(s32), %5(s64)
+ $q0 = COPY %4(<4 x s32>)
+ RET_ReallyLR implicit $q0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 628fb550a0532b..720951eca6a344 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -13417,9 +13417,8 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ldr h1, [x0]
+; CHECK-GI-NEXT: ld1.h { v0 }[1], [x0]
; CHECK-GI-NEXT: add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT: mov.h v0[1], v1[0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%tmp1 = load i16, ptr %bar
@@ -13465,12 +13464,11 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ldr h1, [x0]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT: mov.h v0[1], v1[0]
-; CHECK-GI-NEXT: str x8, [x1]
+; CHECK-GI-NEXT: ld1.h { v0 }[1], [x0]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%tmp1 = load i16, ptr %bar
%tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
@@ -13509,9 +13507,8 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ldr s1, [x0]
+; CHECK-GI-NEXT: ld1.s { v0 }[1], [x0]
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT: mov.s v0[1], v1[0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%tmp1 = load i32, ptr %bar
@@ -13557,12 +13554,11 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ldr s1, [x0]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT: mov.s v0[1], v1[0]
-; CHECK-GI-NEXT: str x8, [x1]
+; CHECK-GI-NEXT: ld1.s { v0 }[1], [x0]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%tmp1 = load i32, ptr %bar
%tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
@@ -13601,9 +13597,8 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ldr d1, [x0]
+; CHECK-GI-NEXT: ld1.d { v0 }[1], [x0]
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %bar
@@ -13643,9 +13638,8 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ldr s1, [x0]
+; CHECK-GI-NEXT: ld1.s { v0 }[1], [x0]
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT: mov.s v0[1], v1[0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%tmp1 = load float, ptr %bar
@@ -13691,12 +13685,11 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ldr s1, [x0]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: add x8, x0, x2, lsl #2
-; CHECK-GI-NEXT: mov.s v0[1], v1[0]
-; CHECK-GI-NEXT: str x8, [x1]
+; CHECK-GI-NEXT: ld1.s { v0 }[1], [x0]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%tmp1 = load float, ptr %bar
%tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
@@ -13735,9 +13728,8 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ldr d1, [x0]
+; CHECK-GI-NEXT: ld1.d { v0 }[1], [x0]
; CHECK-GI-NEXT: add x8, x0, x2, lsl #3
-; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: ret
%tmp1 = load double, ptr %bar
@@ -13792,15 +13784,14 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: add x8, x0, x2, lsl #1
-; CHECK-GI-NEXT: ldr h1, [x0]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: ld1.h { v0 }[1], [x0]
; CHECK-GI-NEXT: str x8, [x1]
-; CHECK-GI-NEXT: mov.h v0[1], v1[0]
-; CHECK-GI-NEXT: ldr d2, [x3]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: cnt.8b v2, v2
-; CHECK-GI-NEXT: uaddlp.4h v2, v2
-; CHECK-GI-NEXT: uaddlp.2s v1, v2
+; CHECK-GI-NEXT: ldr d1, [x3]
+; CHECK-GI-NEXT: cnt.8b v1, v1
+; CHECK-GI-NEXT: uaddlp.4h v1, v1
+; CHECK-GI-NEXT: uaddlp.2s v1, v1
; CHECK-GI-NEXT: str d1, [x3]
; CHECK-GI-NEXT: ret
%tmp1 = load i16, ptr %bar
@@ -13989,24 +13980,14 @@ define void @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr
}
define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) {
-; CHECK-SD-LABEL: test_inc_cycle:
-; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT: adrp x9, _var at PAGE
-; CHECK-SD-NEXT: fmov x8, d0
-; CHECK-SD-NEXT: add x8, x0, x8, lsl #2
-; CHECK-SD-NEXT: str x8, [x9, _var at PAGEOFF]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_inc_cycle:
-; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: ldr s1, [x0]
-; CHECK-GI-NEXT: adrp x9, _var at PAGE
-; CHECK-GI-NEXT: mov.s v0[0], v1[0]
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: add x8, x0, x8, lsl #2
-; CHECK-GI-NEXT: str x8, [x9, _var at PAGEOFF]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_inc_cycle:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT: adrp x9, _var at PAGE
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: add x8, x0, x8, lsl #2
+; CHECK-NEXT: str x8, [x9, _var at PAGEOFF]
+; CHECK-NEXT: ret
%elt = load i32, ptr %in
%newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 54b96520dce41d..d3a8f59c96f993 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1021,16 +1021,10 @@ define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
}
define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_8h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ld1.h { v0 }[0], [x0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: ld1_8h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: mov.h v0[0], v1[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: ld1_8h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1.h { v0 }[0], [x0]
+; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load i16, ptr %bar
%tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
@@ -1038,16 +1032,10 @@ define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
}
define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: ld1_4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr s1, [x0]
-; CHECK-GI-NEXT: mov.s v0[0], v1[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: ld1_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load i32, ptr %bar
%tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
@@ -1055,16 +1043,10 @@ define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
}
define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_4s_float:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: ld1_4s_float:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr s1, [x0]
-; CHECK-GI-NEXT: mov.s v0[0], v1[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: ld1_4s_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load float, ptr %bar
%tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
@@ -1072,16 +1054,10 @@ define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) {
}
define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2d:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ld1.d { v0 }[0], [x0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: ld1_2d:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr d1, [x0]
-; CHECK-GI-NEXT: mov.d v0[0], v1[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: ld1_2d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1.d { v0 }[0], [x0]
+; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load i64, ptr %bar
%tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
@@ -1089,16 +1065,10 @@ define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
}
define <2 x double> @ld1_2d_double(<2 x double> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2d_double:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ld1.d { v0 }[0], [x0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: ld1_2d_double:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr d1, [x0]
-; CHECK-GI-NEXT: mov.d v0[0], v1[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: ld1_2d_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1.d { v0 }[0], [x0]
+; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load double, ptr %bar
%tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
@@ -1137,20 +1107,12 @@ define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
}
define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_4h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: ld1.h { v0 }[0], [x0]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: ld1_4h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov.h v0[0], v1[0]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: ld1_4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: ld1.h { v0 }[0], [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load i16, ptr %bar
%tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
@@ -1158,20 +1120,12 @@ define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
}
define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: ld1_2s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr s1, [x0]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov.s v0[0], v1[0]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: ld1_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load i32, ptr %bar
%tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
@@ -1179,20 +1133,12 @@ define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
}
define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2s_float:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: ld1_2s_float:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr s1, [x0]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov.s v0[0], v1[0]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: ld1_2s_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load float, ptr %bar
%tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
More information about the llvm-commits
mailing list