[llvm] [AArch64][GlobalISel] Adopt some Ld1Lane* patterns for GlobalISel to reduce codegen regressions (PR #69607)

Vladislav Dzhidzhoev via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 27 07:58:24 PDT 2023


https://github.com/dzhidzhoev updated https://github.com/llvm/llvm-project/pull/69607

>From 66e79e28dd2ba0e8017eefa6837e7533c54392cb Mon Sep 17 00:00:00 2001
From: Vladislav Dzhidzhoev <vdzhidzhoev at accesssoftek.com>
Date: Mon, 14 Aug 2023 11:16:04 +0200
Subject: [PATCH 1/2] [AArch64][GlobalISel] Adopt some Ld1Lane* patterns for
 GlobalISel to reduce codegen regressions

Additionally, ignore G_CONSTANTs between adjacent instructions in
isObviouslySafeToFold(). Without that, the new patterns don't have an
impact.
---
 .../GlobalISel/GIMatchTableExecutor.cpp       |  14 +-
 .../lib/Target/AArch64/AArch64InstrFormats.td |   3 +
 llvm/lib/Target/AArch64/AArch64InstrGISel.td  |  36 ++++
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   2 +
 .../GISel/AArch64InstructionSelector.cpp      |  11 ++
 .../AArch64/arm64-indexed-vector-ldst.ll      |  80 ++++-----
 llvm/test/CodeGen/AArch64/arm64-ld1.ll        | 164 +++++-------------
 7 files changed, 140 insertions(+), 170 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
index 26752369a7711a1..1e8dffa26f9e125 100644
--- a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -16,6 +16,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
 
 #define DEBUG_TYPE "gi-match-table-executor"
 
@@ -62,9 +63,16 @@ bool GIMatchTableExecutor::isBaseWithConstantOffset(
 bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
                                                  MachineInstr &IntoMI) const {
   // Immediate neighbours are already folded.
-  if (MI.getParent() == IntoMI.getParent() &&
-      std::next(MI.getIterator()) == IntoMI.getIterator())
-    return true;
+  // Any G_CONSTANT between immediate neighbours can be ignored.
+  if (MI.getParent() == IntoMI.getParent()) {
+    auto IntoIt = IntoMI.getIterator();
+    auto NextIt = std::next(MI.getIterator());
+    while (!NextIt.isEnd() && NextIt != IntoIt &&
+           NextIt->getOpcode() == TargetOpcode::G_CONSTANT)
+      ++NextIt;
+    if (NextIt == IntoIt)
+      return true;
+  }
 
   // Convergent instructions cannot be moved in the CFG.
   if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index e5dbfa404b3c6bf..430ed2612f72e71 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -484,6 +484,9 @@ def UImmS8XForm : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i64);
 }]>;
 
+def gi_UImmS1XForm : GICustomOperandRenderer<"renderUImmS1">,
+  GISDNodeXFormEquiv<UImmS1XForm>;
+
 // uimm5sN predicate - True if the immediate is a multiple of N in the range
 // [0 * N, 32 * N].
 def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 27338bd24393325..1dd7ae90a24e670 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -271,6 +271,7 @@ def : GINodeEquiv<G_UMULL, AArch64umull>;
 def : GINodeEquiv<G_SMULL, AArch64smull>;
 
 def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+def : GINodeEquiv<G_INSERT_VECTOR_ELT, insertelt>;
 
 def : GINodeEquiv<G_PREFETCH, AArch64Prefetch>;
 
@@ -528,3 +529,38 @@ def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
           (LD1Rv2d GPR64sp:$Rn)>;
 def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
           (LD1Rv1d GPR64sp:$Rn)>;
+
+class Ld1Lane64PatGISel<SDPatternOperator scalar_load, Operand VecIndex,
+                   ValueType VTy, ValueType STy, Instruction LD1>
+  : Pat<(insertelt (VTy VecListOne64:$Rd),
+           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+        (EXTRACT_SUBREG
+            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
+                          (UImmS1XForm VecIndex:$idx), GPR64sp:$Rn),
+            dsub)>;
+
+class Ld1Lane128PatGISel<Operand VecIndex, ValueType VTy,
+                         ValueType STy, Instruction LD1>
+  : Pat<(insertelt (VTy VecListOne128:$Rd),
+           (STy (load GPR64sp:$Rn)), VecIndex:$idx),
+        (LD1 VecListOne128:$Rd, (UImmS1XForm VecIndex:$idx), GPR64sp:$Rn)>;
+
+// Enable these patterns only for GlobalISel, since
+// SelectionDAG analogues only select insertelt with i32 indices.
+let Predicates = [OnlyGISel] in {
+  def : Ld1Lane64PatGISel<load, VectorIndexB,    v8i8,  i8, LD1i8>;
+  def : Ld1Lane64PatGISel<load, VectorIndexB32b, v8i8,  i8, LD1i8>;
+  def : Ld1Lane64PatGISel<load, VectorIndexH,    v4i16, i16, LD1i16>;
+  def : Ld1Lane64PatGISel<load, VectorIndexH32b, v4i16, i16, LD1i16>;
+  def : Ld1Lane64PatGISel<load, VectorIndexS,    v2i32, i32, LD1i32>;
+  def : Ld1Lane64PatGISel<load, VectorIndexS32b, v2i32, i32, LD1i32>;
+
+  def : Ld1Lane128PatGISel<VectorIndexB, v16i8, i8, LD1i8>;
+  def : Ld1Lane128PatGISel<VectorIndexB32b, v16i8, i8, LD1i8>;
+  def : Ld1Lane128PatGISel<VectorIndexH, v8i16, i16, LD1i16>;
+  def : Ld1Lane128PatGISel<VectorIndexH32b, v8i16, i16, LD1i16>;
+  def : Ld1Lane128PatGISel<VectorIndexH, v4i32, i32, LD1i32>;
+  def : Ld1Lane128PatGISel<VectorIndexH32b, v4i32, i32, LD1i32>;
+  def : Ld1Lane128PatGISel<VectorIndexH, v2i64, i64, LD1i64>;
+  def : Ld1Lane128PatGISel<VectorIndexH32b, v2i64, i64, LD1i64>;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index df59dc4ad27fadb..08877329373b709 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -872,6 +872,8 @@ let RecomputePerFunction = 1 in {
 
   def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
   def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
+
+  def OnlyGISel : Predicate<"MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
   // Toggles patterns which aren't beneficial in GlobalISel when we aren't
   // optimizing. This allows us to selectively use patterns without impacting
   // SelectionDAG's behaviour.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1c7a09696e853e2..212f6d2ee287a51 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -479,6 +479,9 @@ class AArch64InstructionSelector : public InstructionSelector {
                                     const MachineInstr &MI,
                                     int OpIdx = -1) const;
 
+  void renderUImmS1(MachineInstrBuilder &MIB, const MachineInstr &MI,
+                    int OpIdx = -1) const;
+
   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
   void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
 
@@ -7593,6 +7596,14 @@ void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
                                                       .getZExtValue()));
 }
 
+void AArch64InstructionSelector::renderUImmS1(MachineInstrBuilder &MIB,
+                                              const MachineInstr &MI,
+                                              int OpIdx) const {
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+         "Expected G_CONSTANT");
+  MIB.addImm(MI.getOperand(1).getCImm()->getZExtValue());
+}
+
 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
     const MachineInstr &MI, unsigned NumBytes) const {
   if (!MI.mayLoadOrStore())
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 2cab4932def0724..9d4e4e36ba83d39 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -14283,10 +14283,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <16 x i8> %A)
 ;
 ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr b1, [x0]
+; CHECK-GISEL-NEXT:    ld1.b { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, #1
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.b v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
@@ -14304,10 +14303,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
 ;
 ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr b1, [x0]
+; CHECK-GISEL-NEXT:    ld1.b { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, x2
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.b v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
@@ -14327,11 +14325,10 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
 ;
 ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr b1, [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    add x8, x0, #1
+; CHECK-GISEL-NEXT:    ld1.b { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.b v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i8, ptr %bar
@@ -14352,11 +14349,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i
 ;
 ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr b1, [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    add x8, x0, x2
+; CHECK-GISEL-NEXT:    ld1.b { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.b v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i8, ptr %bar
@@ -14375,10 +14371,9 @@ define <8 x i16> @test_v8i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i16> %A)
 ;
 ; CHECK-GISEL-LABEL: test_v8i16_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr h1, [x0]
+; CHECK-GISEL-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, #2
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.h v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
@@ -14397,9 +14392,8 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x
 ;
 ; CHECK-GISEL-LABEL: test_v8i16_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr h1, [x0]
+; CHECK-GISEL-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GISEL-NEXT:    mov.h v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i16, ptr %bar
@@ -14420,11 +14414,10 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A)
 ;
 ; CHECK-GISEL-LABEL: test_v4i16_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr h1, [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    add x8, x0, #2
+; CHECK-GISEL-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.h v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i16, ptr %bar
@@ -14446,12 +14439,11 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 ;
 ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr h1, [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GISEL-NEXT:    mov.h v0[1], v1[0]
-; CHECK-GISEL-NEXT:    str x8, [x1]
+; CHECK-GISEL-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT:    str x8, [x1]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
@@ -14469,10 +14461,9 @@ define <4 x i32> @test_v4i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i32> %A)
 ;
 ; CHECK-GISEL-LABEL: test_v4i32_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr s1, [x0]
+; CHECK-GISEL-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, #4
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
@@ -14491,9 +14482,8 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 ;
 ; CHECK-GISEL-LABEL: test_v4i32_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr s1, [x0]
+; CHECK-GISEL-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GISEL-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i32, ptr %bar
@@ -14514,11 +14504,10 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A)
 ;
 ; CHECK-GISEL-LABEL: test_v2i32_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr s1, [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    add x8, x0, #4
+; CHECK-GISEL-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i32, ptr %bar
@@ -14540,12 +14529,11 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
 ;
 ; CHECK-GISEL-LABEL: test_v2i32_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr s1, [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GISEL-NEXT:    mov.s v0[1], v1[0]
-; CHECK-GISEL-NEXT:    str x8, [x1]
+; CHECK-GISEL-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT:    str x8, [x1]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
@@ -14563,10 +14551,9 @@ define <2 x i64> @test_v2i64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i64> %A)
 ;
 ; CHECK-GISEL-LABEL: test_v2i64_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr d1, [x0]
+; CHECK-GISEL-NEXT:    ld1.d { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, #8
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i64, ptr %bar
   %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
@@ -14585,9 +14572,8 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
 ;
 ; CHECK-GISEL-LABEL: test_v2i64_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr d1, [x0]
+; CHECK-GISEL-NEXT:    ld1.d { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GISEL-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i64, ptr %bar
@@ -14606,10 +14592,9 @@ define <4 x float> @test_v4f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x float>
 ;
 ; CHECK-GISEL-LABEL: test_v4f32_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr s1, [x0]
+; CHECK-GISEL-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, #4
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
@@ -14628,9 +14613,8 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4
 ;
 ; CHECK-GISEL-LABEL: test_v4f32_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr s1, [x0]
+; CHECK-GISEL-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GISEL-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load float, ptr %bar
@@ -14651,11 +14635,10 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float>
 ;
 ; CHECK-GISEL-LABEL: test_v2f32_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr s1, [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    add x8, x0, #4
+; CHECK-GISEL-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.s v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load float, ptr %bar
@@ -14677,12 +14660,11 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2
 ;
 ; CHECK-GISEL-LABEL: test_v2f32_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr s1, [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    add x8, x0, x2, lsl #2
-; CHECK-GISEL-NEXT:    mov.s v0[1], v1[0]
-; CHECK-GISEL-NEXT:    str x8, [x1]
+; CHECK-GISEL-NEXT:    ld1.s { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-GISEL-NEXT:    str x8, [x1]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
@@ -14700,10 +14682,9 @@ define <2 x double> @test_v2f64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x double
 ;
 ; CHECK-GISEL-LABEL: test_v2f64_post_imm_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr d1, [x0]
+; CHECK-GISEL-NEXT:    ld1.d { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, #8
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load double, ptr %bar
   %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
@@ -14722,9 +14703,8 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <
 ;
 ; CHECK-GISEL-LABEL: test_v2f64_post_reg_ld1lane:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr d1, [x0]
+; CHECK-GISEL-NEXT:    ld1.d { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    add x8, x0, x2, lsl #3
-; CHECK-GISEL-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load double, ptr %bar
@@ -14779,15 +14759,14 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
 ; CHECK-GISEL-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
 ; CHECK-GISEL:       ; %bb.0:
 ; CHECK-GISEL-NEXT:    add x8, x0, x2, lsl #1
-; CHECK-GISEL-NEXT:    ldr h1, [x0]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-GISEL-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GISEL-NEXT:    str x8, [x1]
-; CHECK-GISEL-NEXT:    mov.h v0[1], v1[0]
-; CHECK-GISEL-NEXT:    ldr d2, [x3]
 ; CHECK-GISEL-NEXT:    ; kill: def $d0 killed $d0 killed $q0
-; CHECK-GISEL-NEXT:    cnt.8b v2, v2
-; CHECK-GISEL-NEXT:    uaddlp.4h v2, v2
-; CHECK-GISEL-NEXT:    uaddlp.2s v1, v2
+; CHECK-GISEL-NEXT:    ldr d1, [x3]
+; CHECK-GISEL-NEXT:    cnt.8b v1, v1
+; CHECK-GISEL-NEXT:    uaddlp.4h v1, v1
+; CHECK-GISEL-NEXT:    uaddlp.2s v1, v1
 ; CHECK-GISEL-NEXT:    str d1, [x3]
 ; CHECK-GISEL-NEXT:    ret
   %tmp1 = load i16, ptr %bar
@@ -14987,9 +14966,8 @@ define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) {
 ;
 ; CHECK-GISEL-LABEL: test_inc_cycle:
 ; CHECK-GISEL:       ; %bb.0:
-; CHECK-GISEL-NEXT:    ldr s1, [x0]
+; CHECK-GISEL-NEXT:    ld1.s { v0 }[0], [x0]
 ; CHECK-GISEL-NEXT:    adrp x9, _var at PAGE
-; CHECK-GISEL-NEXT:    mov.s v0[0], v1[0]
 ; CHECK-GISEL-NEXT:    fmov x8, d0
 ; CHECK-GISEL-NEXT:    add x8, x0, x8, lsl #2
 ; CHECK-GISEL-NEXT:    str x8, [x9, _var at PAGEOFF]
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 54b96520dce41d9..4cef1f966c838f4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1004,16 +1004,10 @@ declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwin
 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly
 
 define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_16b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.b { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_16b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr b1, [x0]
-; CHECK-GI-NEXT:    mov.b v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.b { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
@@ -1021,16 +1015,10 @@ define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
 }
 
 define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_8h:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.h { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_8h:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr h1, [x0]
-; CHECK-GI-NEXT:    mov.h v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
@@ -1038,16 +1026,10 @@ define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
 }
 
 define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_4s:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_4s:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
@@ -1055,16 +1037,10 @@ define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
 }
 
 define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_4s_float:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_4s_float:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_4s_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
@@ -1072,16 +1048,10 @@ define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) {
 }
 
 define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2d:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.d { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_2d:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr d1, [x0]
-; CHECK-GI-NEXT:    mov.d v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.d { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i64, ptr %bar
   %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
@@ -1089,16 +1059,10 @@ define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
 }
 
 define <2 x double> @ld1_2d_double(<2 x double> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2d_double:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ld1.d { v0 }[0], [x0]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_2d_double:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr d1, [x0]
-; CHECK-GI-NEXT:    mov.d v0[0], v1[0]
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_2d_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1.d { v0 }[0], [x0]
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load double, ptr %bar
   %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
@@ -1116,20 +1080,12 @@ define <1 x i64> @ld1_1d(ptr %p) {
 }
 
 define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_8b:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    ld1.b { v0 }[0], [x0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_8b:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr b1, [x0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov.b v0[0], v1[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.b { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
@@ -1137,20 +1093,12 @@ define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
 }
 
 define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_4h:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    ld1.h { v0 }[0], [x0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_4h:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr h1, [x0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov.h v0[0], v1[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
@@ -1158,20 +1106,12 @@ define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
 }
 
 define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2s:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_2s:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
@@ -1179,20 +1119,12 @@ define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
 }
 
 define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
-; CHECK-SD-LABEL: ld1_2s_float:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0]
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: ld1_2s_float:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr s1, [x0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov.s v0[0], v1[0]
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: ld1_2s_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0

>From 2cc56b6eafaa26ce948ecf5f76130b04740d92cd Mon Sep 17 00:00:00 2001
From: Vladislav Dzhidzhoev <vdzhidzhoev at accesssoftek.com>
Date: Fri, 27 Oct 2023 16:58:05 +0200
Subject: [PATCH 2/2] Fix OnlyGISel condition

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 08877329373b709..571791d83b2f6c6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -873,7 +873,7 @@ let RecomputePerFunction = 1 in {
   def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
   def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
 
-  def OnlyGISel : Predicate<"MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
+  def OnlyGISel : Predicate<"!MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) && MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
   // Toggles patterns which aren't beneficial in GlobalISel when we aren't
   // optimizing. This allows us to selectively use patterns without impacting
   // SelectionDAG's behaviour.



More information about the llvm-commits mailing list