[llvm] 626039c - [AArch64] Split fuse-literals feature

Alexander Shaposhnikov via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 10 22:28:05 PDT 2022


Author: Alexander Shaposhnikov
Date: 2022-04-11T05:27:11Z
New Revision: 626039cdcc16b429c4403d36fad13fba2a6c14e9

URL: https://github.com/llvm/llvm-project/commit/626039cdcc16b429c4403d36fad13fba2a6c14e9
DIFF: https://github.com/llvm/llvm-project/commit/626039cdcc16b429c4403d36fad13fba2a6c14e9.diff

LOG: [AArch64] Split fuse-literals feature

This diff splits fuse-literals feature and enables fuse-adrp-add by default,
in particular, it adjusts instruction scheduling to place ADRP+ADD pairs together.
This also enables the linker to apply the relaxations described in
https://github.com/ARM-software/abi-aa/commit/d2ca58c54b8e955cfef25c71822f837ae0439d73.

Differential revision: https://reviews.llvm.org/D120104

Test plan: make check-all

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64.td
    llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
    llvm/lib/Target/AArch64/AArch64Subtarget.h
    llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll
    llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
    llvm/test/CodeGen/AArch64/fold-global-offsets.ll
    llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
    llvm/test/CodeGen/AArch64/jump-table-32.ll
    llvm/test/CodeGen/AArch64/machine-outliner-throw.ll
    llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
    llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
    llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll
    llvm/test/CodeGen/AArch64/sve-vector-splat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 82161b162ecdf..2abf2da6bc700 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -250,6 +250,10 @@ def FeatureFuseCryptoEOR : SubtargetFeature<
     "fuse-crypto-eor", "HasFuseCryptoEOR", "true",
     "CPU fuses AES/PMULL and EOR operations">;
 
+def FeatureFuseAdrpAdd : SubtargetFeature<
+    "fuse-adrp-add", "HasFuseAdrpAdd", "true",
+    "CPU fuses adrp+add operations">;
+
 def FeatureFuseLiterals : SubtargetFeature<
     "fuse-literals", "HasFuseLiterals", "true",
     "CPU fuses literal generation operations">;
@@ -660,6 +664,7 @@ def TuneA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                    FeatureFuseAES,
                                    FeatureBalanceFPOps,
                                    FeatureCustomCheapAsMoveHandling,
+                                   FeatureFuseAdrpAdd,
                                    FeatureFuseLiterals,
                                    FeaturePostRAScheduler,
                                    FeaturePredictableSelectIsExpensive]>;
@@ -668,11 +673,13 @@ def TuneA65     : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
                                    "Cortex-A65 ARM processors", [
                                    FeatureFuseAES,
                                    FeatureFuseAddress,
+                                   FeatureFuseAdrpAdd,
                                    FeatureFuseLiterals]>;
 
 def TuneA72     : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
                                    "Cortex-A72 ARM processors", [
                                    FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
                                    FeatureFuseLiterals]>;
 
 def TuneA73     : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
@@ -813,6 +820,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
                                     FeatureFuseArithmeticLogic,
                                     FeatureFuseCCSelect,
                                     FeatureFuseCryptoEOR,
+                                    FeatureFuseAdrpAdd,
                                     FeatureFuseLiterals,
                                     FeatureZCRegMove,
                                     FeatureZCZeroing]>;
@@ -824,6 +832,7 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
                                      FeatureFuseAddress,
                                      FeatureFuseAES,
                                      FeatureFuseCCSelect,
+                                     FeatureFuseAdrpAdd,
                                      FeatureFuseLiterals,
                                      FeatureLSLFast,
                                      FeaturePostRAScheduler,
@@ -840,6 +849,7 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
                                      FeatureFuseAES,
                                      FeatureFuseArithmeticLogic,
                                      FeatureFuseCCSelect,
+                                     FeatureFuseAdrpAdd,
                                      FeatureFuseLiterals,
                                      FeatureLSLFast,
                                      FeaturePostRAScheduler,
@@ -1062,7 +1072,7 @@ def ProcessorFeatures {
   // by default for users targeting generic AArch64. The extensions do not
   // affect code generated by the compiler and can be used only by explicitly
   // mentioning the new system register names in assembly.
-  list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE];
+  list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE, FeatureFuseAdrpAdd];
 }
 
 

diff  --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index e8217eaf6ed5c..c7657f37d16d9 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -157,16 +157,19 @@ static bool isCryptoEORPair(const MachineInstr *FirstMI,
   return false;
 }
 
-/// Literal generation.
-static bool isLiteralsPair(const MachineInstr *FirstMI,
-                           const MachineInstr &SecondMI) {
+static bool isAdrpAddPair(const MachineInstr *FirstMI,
+                          const MachineInstr &SecondMI) {
   // Assume the 1st instr to be a wildcard if it is unspecified.
-
-  // PC relative address.
   if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) &&
       SecondMI.getOpcode() == AArch64::ADDXri)
     return true;
+  return false;
+}
 
+/// Literal generation.
+static bool isLiteralsPair(const MachineInstr *FirstMI,
+                           const MachineInstr &SecondMI) {
+  // Assume the 1st instr to be a wildcard if it is unspecified.
   // 32 bit immediate.
   if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) &&
       (SecondMI.getOpcode() == AArch64::MOVKWi &&
@@ -397,6 +400,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
     return true;
   if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI))
     return true;
+  if (ST.hasFuseAdrpAdd() && isAdrpAddPair(FirstMI, SecondMI))
+    return true;
   if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
     return true;
   if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI))

diff  --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index ceb92582dbdb3..e919263f92794 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -204,8 +204,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// Return true if the CPU supports any kind of instruction fusion.
   bool hasFusion() const {
     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
-           hasFuseAES() || hasFuseArithmeticLogic() ||
-           hasFuseCCSelect() || hasFuseLiterals();
+           hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
+           hasFuseAdrpAdd() || hasFuseLiterals();
   }
 
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll
index c146138e603e8..eb41722839d82 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll
@@ -56,10 +56,9 @@ define void @baz(i8* %arg) !dbg !6 {
 ; CHECK-NEXT:    lsl x8, x0, #4
 ; CHECK-NEXT:    .loc 1 0 0 is_stmt 0 // tmp.ll:0:0
 ; CHECK-NEXT:    adrp x9, global+202752
+; CHECK-NEXT:    add x9, x9, :lo12:global+202752
 ; CHECK-NEXT:    .loc 1 4 1 // tmp.ll:4:1
 ; CHECK-NEXT:    and x8, x8, #0x1ff0
-; CHECK-NEXT:    .loc 1 0 0 // tmp.ll:0:0
-; CHECK-NEXT:    add x9, x9, :lo12:global+202752
 ; CHECK-NEXT:    .loc 1 5 1 is_stmt 1 // tmp.ll:5:1
 ; CHECK-NEXT:    str xzr, [x9, x8]
 ; CHECK-NEXT:    .loc 1 6 1 // tmp.ll:6:1

diff  --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
index 4b94de4c7a49b..2139d7043ab22 100644
--- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
+++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
@@ -457,10 +457,10 @@ define void @caller_in_memory() {
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    add x8, sp, #8
 ; CHECK-NEXT:    bl return_in_memory
-; CHECK-NEXT:    adrp x8, in_memory_store
 ; CHECK-NEXT:    ldur q0, [sp, #24]
-; CHECK-NEXT:    ldur q1, [sp, #8]
+; CHECK-NEXT:    adrp x8, in_memory_store
 ; CHECK-NEXT:    add x8, x8, :lo12:in_memory_store
+; CHECK-NEXT:    ldur q1, [sp, #8]
 ; CHECK-NEXT:    ldur q2, [sp, #56]
 ; CHECK-NEXT:    ldur q3, [sp, #40]
 ; CHECK-NEXT:    ldr d4, [sp, #72]
@@ -478,14 +478,14 @@ define void @caller_in_memory() {
 define void @callee_in_memory(%T_IN_MEMORY %a) {
 ; CHECK-LABEL: callee_in_memory:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, in_memory_store
 ; CHECK-NEXT:    ldr d0, [sp, #64]
-; CHECK-NEXT:    ldp q1, q2, [sp, #32]
+; CHECK-NEXT:    adrp x8, in_memory_store
 ; CHECK-NEXT:    add x8, x8, :lo12:in_memory_store
-; CHECK-NEXT:    str d0, [x8, #64]
 ; CHECK-NEXT:    ldr q3, [sp, #16]
-; CHECK-NEXT:    stp q1, q2, [x8, #32]
+; CHECK-NEXT:    ldp q1, q2, [sp, #32]
+; CHECK-NEXT:    str d0, [x8, #64]
 ; CHECK-NEXT:    ldr q0, [sp]
+; CHECK-NEXT:    stp q1, q2, [x8, #32]
 ; CHECK-NEXT:    stp q0, q3, [x8]
 ; CHECK-NEXT:    ret
   store %T_IN_MEMORY %a, %T_IN_MEMORY* @in_memory_store

diff  --git a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll
index aa5435c4fd6b3..c996b07b080a3 100644
--- a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll
@@ -71,8 +71,8 @@ define [2 x i64] @f4() {
 ;
 ; GISEL-LABEL: f4:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x9, x2+8
 ; GISEL-NEXT:    adrp x8, x2+8
+; GISEL-NEXT:    adrp x9, x2+8
 ; GISEL-NEXT:    add x9, x9, :lo12:x2+8
 ; GISEL-NEXT:    ldr x0, [x8, :lo12:x2+8]
 ; GISEL-NEXT:    ldr x1, [x9, #8]

diff  --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
index 62fc207b0de2a..17db35c529f0a 100644
--- a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
+++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
@@ -8,8 +8,8 @@ define void @test1() {
 ; CHECK-LABEL: test1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, x
-; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x8, x8, :lo12:x
+; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    ldp x8, x9, [x8]
 ; CHECK-NEXT:    stp x8, x9, [x10]
@@ -23,8 +23,8 @@ define void @test2() {
 ; CHECK-LABEL: test2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, x
-; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x8, x8, :lo12:x
+; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    ldp x8, x9, [x8, #504]
 ; CHECK-NEXT:    stp x8, x9, [x10, #504]
@@ -38,10 +38,10 @@ define void @test3() {
 ; CHECK-LABEL: test3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, x
-; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x8, x8, :lo12:x
-; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    add x8, x8, #512
+; CHECK-NEXT:    adrp x10, y
+; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    add x10, x10, #512
 ; CHECK-NEXT:    ldp x8, x9, [x8]
 ; CHECK-NEXT:    stp x8, x9, [x10]
@@ -55,8 +55,8 @@ define void @test4() {
 ; CHECK-LABEL: test4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, x
-; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x8, x8, :lo12:x
+; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    ldp x8, x9, [x8, #-512]
 ; CHECK-NEXT:    stp x8, x9, [x10, #-512]
@@ -70,10 +70,10 @@ define void @test5() {
 ; CHECK-LABEL: test5:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, x
-; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x8, x8, :lo12:x
-; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    sub x8, x8, #520
+; CHECK-NEXT:    adrp x10, y
+; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    sub x10, x10, #520
 ; CHECK-NEXT:    ldp x8, x9, [x8]
 ; CHECK-NEXT:    stp x8, x9, [x10]
@@ -87,10 +87,10 @@ define void @test6() {
 ; CHECK-LABEL: test6:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, x
-; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x8, x8, :lo12:x
-; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    sub x8, x8, #520
+; CHECK-NEXT:    adrp x10, y
+; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    sub x10, x10, #520
 ; CHECK-NEXT:    ldp x8, x9, [x8]
 ; CHECK-NEXT:    stp x8, x9, [x10]
@@ -104,10 +104,10 @@ define void @test7() {
 ; CHECK-LABEL: test7:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, x
-; CHECK-NEXT:    adrp x10, y
 ; CHECK-NEXT:    add x8, x8, :lo12:x
-; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    add x8, x8, #503
+; CHECK-NEXT:    adrp x10, y
+; CHECK-NEXT:    add x10, x10, :lo12:y
 ; CHECK-NEXT:    add x10, x10, #503
 ; CHECK-NEXT:    ldp x8, x9, [x8]
 ; CHECK-NEXT:    stp x8, x9, [x10]

diff  --git a/llvm/test/CodeGen/AArch64/jump-table-32.ll b/llvm/test/CodeGen/AArch64/jump-table-32.ll
index 339a44fc95ac4..d8572e901af29 100644
--- a/llvm/test/CodeGen/AArch64/jump-table-32.ll
+++ b/llvm/test/CodeGen/AArch64/jump-table-32.ll
@@ -9,8 +9,9 @@ define i32 @test_jumptable(i32 %in) {
     i32 2, label %lbl3
     i32 4, label %lbl4
   ]
-; CHECK: adrp    [[JTPAGE:x[0-9]+]], LJTI0_0 at PAGE
+
 ; CHECK: mov     w[[INDEX:[0-9]+]], w0
+; CHECK: adrp    [[JTPAGE:x[0-9]+]], LJTI0_0 at PAGE
 ; CHECK: add     x[[JT:[0-9]+]], [[JTPAGE]], LJTI0_0 at PAGEOFF
 ; CHECK: adr     [[BASE_BLOCK:x[0-9]+]], LBB0_2
 ; CHECK: ldrb    w[[OFFSET:[0-9]+]], [x[[JT]], x[[INDEX]]]

diff  --git a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll
index 834cf371ac259..2b03fa34453ee 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll
@@ -55,8 +55,8 @@ entry:
 ; CHECK-LABEL: OUTLINED_FUNCTION_0:
 ; CHECK:      .cfi_startproc
 ; CHECK:        adrp    x1, _ZTIi
-; CHECK-NEXT:   mov     x2, xzr
 ; CHECK-NEXT:   add     x1, x1, :lo12:_ZTIi
+; CHECK-NEXT:   mov     x2, xzr
 ; CHECK-NEXT:   str     w19, [x0]
 ; CHECK-NEXT:   b       __cxa_throw
 ; CHECK:      .cfi_endproc

diff  --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
index d9837c92426d1..bc70e9fe7c9f4 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
@@ -1,5 +1,5 @@
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-adrp-add,-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-adrp-add,+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57      | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65      | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72      | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE

diff  --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
index 587663f261207..feb44beaed8db 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
@@ -23,11 +23,11 @@ define dso_local void @run_test() local_unnamed_addr #0 {
 ; CHECK-NEXT:    .cfi_offset b14, -56
 ; CHECK-NEXT:    .cfi_offset b15, -64
 ; CHECK-NEXT:    movi v14.2d, #0000000000000000
-; CHECK-NEXT:    adrp x10, B+48
-; CHECK-NEXT:    adrp x11, A
 ; CHECK-NEXT:    mov x8, xzr
 ; CHECK-NEXT:    mov x9, xzr
+; CHECK-NEXT:    adrp x10, B+48
 ; CHECK-NEXT:    add x10, x10, :lo12:B+48
+; CHECK-NEXT:    adrp x11, A
 ; CHECK-NEXT:    add x11, x11, :lo12:A
 ; CHECK-NEXT:    // implicit-def: $q2
 ; CHECK-NEXT:    // implicit-def: $q3

diff  --git a/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll b/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll
index ba65b5f4d5f46..ecfbb67e8ec96 100644
--- a/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll
@@ -2,8 +2,8 @@
 
 ; CHECK-LABEL: vls_sve_and_64xi8:
 ; CHECK-NEXT:  adrp    x[[ONE:[0-9]+]], .LCPI0_0
-; CHECK-NEXT:  ptrue   p0.b, vl64
 ; CHECK-NEXT:  add     x[[TWO:[0-9]+]], x[[ONE]], :lo12:.LCPI0_0
+; CHECK-NEXT:  ptrue   p0.b, vl64
 ; CHECK-NEXT:  ld1b    { z0.b }, p0/z, [x0]
 ; CHECK-NEXT:  ld1b    { z1.b }, p0/z, [x[[TWO]]]
 ; CHECK-NEXT:  and     z0.d, z0.d, z1.d

diff  --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 31d2404666247..5416f0c976e87 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -578,8 +578,8 @@ define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
 ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI55_0
-; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    add x8, x8, :lo12:.LCPI55_0
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ld1rd { z0.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x double> undef, double 3.33, i32 0


        


More information about the llvm-commits mailing list