[llvm] 572fc7d - [AArch64] Order STP Q's by ascending address
Andre Vieira via llvm-commits
llvm-commits at lists.llvm.org
Mon May 23 01:51:07 PDT 2022
Author: Andre Vieira
Date: 2022-05-23T09:50:44+01:00
New Revision: 572fc7d2fd14b768b58e697015b18c46cafb421c
URL: https://github.com/llvm/llvm-project/commit/572fc7d2fd14b768b58e697015b18c46cafb421c
DIFF: https://github.com/llvm/llvm-project/commit/572fc7d2fd14b768b58e697015b18c46cafb421c.diff
LOG: [AArch64] Order STP Q's by ascending address
This patch adds an AArch64 specific PostRA MachineScheduler to try to schedule
STP Q's to the same base-address in ascending order of offsets. We have found
this to improve performance on Neoverse N1 and should not hurt other AArch64
cores.
Differential Revision: https://reviews.llvm.org/D125377
Added:
llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp
llvm/lib/Target/AArch64/AArch64MachineScheduler.h
Modified:
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/lib/Target/AArch64/CMakeLists.txt
llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp
new file mode 100644
index 0000000000000..ff15c0b07aa42
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp
@@ -0,0 +1,39 @@
+//===- AArch64MachineScheduler.cpp - MI Scheduler for AArch64 -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MachineScheduler.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+
+using namespace llvm;
+
+bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand) {
+ bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand);
+
+ if (Cand.isValid()) {
+ MachineInstr *Instr0 = TryCand.SU->getInstr();
+ MachineInstr *Instr1 = Cand.SU->getInstr();
+ // When dealing with two STPqi's.
+ if (Instr0 && Instr1 && Instr0->getOpcode() == Instr1->getOpcode () &&
+ Instr0->getOpcode() == AArch64::STPQi)
+ {
+ MachineOperand &Base0 = Instr0->getOperand(2);
+ MachineOperand &Base1 = Instr1->getOperand(2);
+ int64_t Off0 = Instr0->getOperand(3).getImm();
+ int64_t Off1 = Instr1->getOperand(3).getImm();
+ // With the same base address and non-overlapping writes.
+ if (Base0.isIdenticalTo(Base1) && llabs (Off0 - Off1) >= 2) {
+ TryCand.Reason = NodeOrder;
+ // Order them by ascending offsets.
+ return Off0 < Off1;
+ }
+ }
+ }
+
+ return OriginalResult;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.h b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h
new file mode 100644
index 0000000000000..23df015986d10
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h
@@ -0,0 +1,33 @@
+//===- AArch64MachineScheduler.h - Custom AArch64 MI scheduler --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom AArch64 MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// A MachineSchedStrategy implementation for AArch64 post RA scheduling.
+class AArch64PostRASchedStrategy : public PostGenericScheduler {
+public:
+ AArch64PostRASchedStrategy(const MachineSchedContext *C) :
+ PostGenericScheduler(C) {}
+
+protected:
+ bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 2b73cd2aa4127..bcfc350805547 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -12,6 +12,7 @@
#include "AArch64TargetMachine.h"
#include "AArch64.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64MachineScheduler.h"
#include "AArch64MacroFusion.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetObjectFile.h"
@@ -474,15 +475,17 @@ class AArch64PassConfig : public TargetPassConfig {
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+ ScheduleDAGMI *DAG =
+ new ScheduleDAGMI(C, std::make_unique<AArch64PostRASchedStrategy>(C),
+ /* RemoveKillFlags=*/true);
if (ST.hasFusion()) {
// Run the Macro Fusion after RA again since literals are expanded from
// pseudos then (v. addPreSched2()).
- ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
DAG->addMutation(createAArch64MacroFusionDAGMutation());
return DAG;
}
- return nullptr;
+ return DAG;
}
void addIRPasses() override;
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index aeedeb4eebac8..ca7d53dce2bb0 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -65,6 +65,7 @@ add_llvm_target(AArch64CodeGen
AArch64LoadStoreOptimizer.cpp
AArch64LowerHomogeneousPrologEpilog.cpp
AArch64MachineFunctionInfo.cpp
+ AArch64MachineScheduler.cpp
AArch64MacroFusion.cpp
AArch64MIPeepholeOpt.cpp
AArch64MCInstLower.cpp
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
index 8cb0d5401b7de..2339157f5736c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
@@ -62,11 +62,11 @@ define i32 @test_musttail_variadic_spill(i32 %arg0, ...) {
; CHECK-NEXT: mov x24, x5
; CHECK-NEXT: mov x25, x6
; CHECK-NEXT: mov x26, x7
-; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill
; CHECK-NEXT: mov x27, x8
-; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill
; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill
-; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill
; CHECK-NEXT: bl _puts
; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload
; CHECK-NEXT: mov w0, w19
@@ -132,11 +132,11 @@ define void @f_thunk(i8* %this, ...) {
; CHECK-NEXT: mov x24, x5
; CHECK-NEXT: mov x25, x6
; CHECK-NEXT: mov x26, x7
-; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill
; CHECK-NEXT: mov x27, x8
-; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill
; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill
-; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill
+; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill
; CHECK-NEXT: str x10, [x9]
; CHECK-NEXT: bl _get_f
; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
index 2139d7043ab22..0b11b1555fb88 100644
--- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
+++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
@@ -53,8 +53,8 @@ define [ 9 x double ] @array_9() {
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: str xzr, [x8, #64]
-; CHECK-NEXT: stp q0, q0, [x8, #32]
; CHECK-NEXT: stp q0, q0, [x8]
+; CHECK-NEXT: stp q0, q0, [x8, #32]
; CHECK-NEXT: ret
ret [ 9 x double ] zeroinitializer
}
@@ -232,8 +232,8 @@ define [ 5 x %T_STRUCT_SAMEM ] @array_of_struct_in_memory() {
; CHECK-LABEL: array_of_struct_in_memory:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: stp q0, q0, [x8, #48]
; CHECK-NEXT: stp q0, q0, [x8, #16]
+; CHECK-NEXT: stp q0, q0, [x8, #48]
; CHECK-NEXT: str q0, [x8]
; CHECK-NEXT: ret
ret [ 5 x %T_STRUCT_SAMEM ] zeroinitializer
@@ -350,8 +350,8 @@ define [ 2 x %T_NESTED_STRUCT_SAMEM ] @array_of_struct_nested_same_field_types_2
; CHECK-LABEL: array_of_struct_nested_same_field_types_2:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: stp q0, q0, [x8, #48]
; CHECK-NEXT: stp q0, q0, [x8, #16]
+; CHECK-NEXT: stp q0, q0, [x8, #48]
; CHECK-NEXT: str q0, [x8]
; CHECK-NEXT: ret
ret [ 2 x %T_NESTED_STRUCT_SAMEM ] zeroinitializer
@@ -440,8 +440,8 @@ define %T_IN_MEMORY @return_in_memory() {
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: str xzr, [x8, #64]
-; CHECK-NEXT: stp q0, q0, [x8, #32]
; CHECK-NEXT: stp q0, q0, [x8]
+; CHECK-NEXT: stp q0, q0, [x8, #32]
; CHECK-NEXT: ret
ret %T_IN_MEMORY zeroinitializer
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
index 1d59193f4d915..1fe87a3fb5a04 100644
--- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
@@ -52,8 +52,8 @@ define void @bzero_64_heap(i8* nocapture %c) {
; CHECK-LABEL: bzero_64_heap:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: stp q0, q0, [x0, #32]
; CHECK-NEXT: stp q0, q0, [x0]
+; CHECK-NEXT: stp q0, q0, [x0, #32]
; CHECK-NEXT: ret
call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 64, i1 false)
ret void
@@ -230,8 +230,8 @@ define void @bzero_64_stack() {
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x0, sp
-; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: bl something
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #80
@@ -253,8 +253,8 @@ define void @bzero_72_stack() {
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x0, sp
; CHECK-NEXT: str xzr, [sp, #64]
-; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: bl something
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
@@ -275,10 +275,10 @@ define void @bzero_128_stack() {
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x0, sp
-; CHECK-NEXT: stp q0, q0, [sp, #96]
-; CHECK-NEXT: stp q0, q0, [sp, #64]
-; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: stp q0, q0, [sp, #32]
+; CHECK-NEXT: stp q0, q0, [sp, #64]
+; CHECK-NEXT: stp q0, q0, [sp, #96]
; CHECK-NEXT: bl something
; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #144
@@ -300,14 +300,14 @@ define void @bzero_256_stack() {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x0, sp
-; CHECK-NEXT: stp q0, q0, [sp, #224]
-; CHECK-NEXT: stp q0, q0, [sp, #192]
-; CHECK-NEXT: stp q0, q0, [sp, #160]
-; CHECK-NEXT: stp q0, q0, [sp, #128]
-; CHECK-NEXT: stp q0, q0, [sp, #96]
-; CHECK-NEXT: stp q0, q0, [sp, #64]
-; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: stp q0, q0, [sp, #32]
+; CHECK-NEXT: stp q0, q0, [sp, #64]
+; CHECK-NEXT: stp q0, q0, [sp, #96]
+; CHECK-NEXT: stp q0, q0, [sp, #128]
+; CHECK-NEXT: stp q0, q0, [sp, #160]
+; CHECK-NEXT: stp q0, q0, [sp, #192]
+; CHECK-NEXT: stp q0, q0, [sp, #224]
; CHECK-NEXT: bl something
; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #272
@@ -497,8 +497,8 @@ define void @memset_64_stack() {
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: movi v0.16b, #170
; CHECK-NEXT: mov x0, sp
-; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: bl something
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #80
@@ -521,8 +521,8 @@ define void @memset_72_stack() {
; CHECK-NEXT: mov x8, #-6148914691236517206
; CHECK-NEXT: mov x0, sp
; CHECK-NEXT: str x8, [sp, #64]
-; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: bl something
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
@@ -543,10 +543,10 @@ define void @memset_128_stack() {
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: movi v0.16b, #170
; CHECK-NEXT: mov x0, sp
-; CHECK-NEXT: stp q0, q0, [sp, #96]
-; CHECK-NEXT: stp q0, q0, [sp, #64]
-; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: stp q0, q0, [sp, #32]
+; CHECK-NEXT: stp q0, q0, [sp, #64]
+; CHECK-NEXT: stp q0, q0, [sp, #96]
; CHECK-NEXT: bl something
; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #144
@@ -568,14 +568,14 @@ define void @memset_256_stack() {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: movi v0.16b, #170
; CHECK-NEXT: mov x0, sp
-; CHECK-NEXT: stp q0, q0, [sp, #224]
-; CHECK-NEXT: stp q0, q0, [sp, #192]
-; CHECK-NEXT: stp q0, q0, [sp, #160]
-; CHECK-NEXT: stp q0, q0, [sp, #128]
-; CHECK-NEXT: stp q0, q0, [sp, #96]
-; CHECK-NEXT: stp q0, q0, [sp, #64]
-; CHECK-NEXT: stp q0, q0, [sp, #32]
; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: stp q0, q0, [sp, #32]
+; CHECK-NEXT: stp q0, q0, [sp, #64]
+; CHECK-NEXT: stp q0, q0, [sp, #96]
+; CHECK-NEXT: stp q0, q0, [sp, #128]
+; CHECK-NEXT: stp q0, q0, [sp, #160]
+; CHECK-NEXT: stp q0, q0, [sp, #192]
+; CHECK-NEXT: stp q0, q0, [sp, #224]
; CHECK-NEXT: bl something
; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #272
diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
index b2c68148d0e56..7570b8b03b884 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
@@ -152,15 +152,11 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: stp q13, q12, [x8]
; CHECK-NEXT: stp q11, q10, [x8, #32]
; CHECK-NEXT: stp q9, q8, [x8, #64]
-; CHECK-NEXT: stp q4, q15, [x8, #432]
-; CHECK-NEXT: stp q14, q3, [x8, #464]
-; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: stp q31, q30, [x8, #96]
; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: stp q29, q28, [x8, #144]
+; CHECK-NEXT: stp q31, q30, [x8, #96]
; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: stp q29, q28, [x8, #144]
; CHECK-NEXT: stp q27, q26, [x8, #176]
-; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: str q25, [x8, #208]
; CHECK-NEXT: stp q24, q23, [x8, #240]
; CHECK-NEXT: stp q22, q21, [x8, #272]
@@ -168,7 +164,11 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: stp q18, q17, [x8, #336]
; CHECK-NEXT: stp q16, q7, [x8, #368]
; CHECK-NEXT: stp q6, q5, [x8, #400]
+; CHECK-NEXT: stp q4, q15, [x8, #432]
+; CHECK-NEXT: stp q14, q3, [x8, #464]
+; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: str q2, [x8, #496]
+; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore b8
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
index f863eee2b8654..3539fcbf28b76 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -249,9 +249,9 @@ define void @select_v128f16(<128 x half>* %a, <128 x half>* %b, i1 %mask) #0 {
; NO_SVE-NEXT: ldr q26, [x1, #32]
; NO_SVE-NEXT: ldr q27, [x1, #16]
; NO_SVE-NEXT: ldr q11, [x1]
+; NO_SVE-NEXT: stp q3, q2, [x0, #192]
; NO_SVE-NEXT: stp q1, q0, [x0, #224]
; NO_SVE-NEXT: mov v0.16b, v8.16b
-; NO_SVE-NEXT: stp q3, q2, [x0, #192]
; NO_SVE-NEXT: mov v1.16b, v8.16b
; NO_SVE-NEXT: mov v2.16b, v8.16b
; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b
@@ -531,9 +531,9 @@ define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, i1 %mask) #0 {
; NO_SVE-NEXT: ldr q26, [x1, #32]
; NO_SVE-NEXT: ldr q27, [x1, #16]
; NO_SVE-NEXT: ldr q11, [x1]
+; NO_SVE-NEXT: stp q3, q2, [x0, #192]
; NO_SVE-NEXT: stp q1, q0, [x0, #224]
; NO_SVE-NEXT: mov v0.16b, v8.16b
-; NO_SVE-NEXT: stp q3, q2, [x0, #192]
; NO_SVE-NEXT: mov v1.16b, v8.16b
; NO_SVE-NEXT: mov v2.16b, v8.16b
; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b
@@ -813,9 +813,9 @@ define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, i1 %mask) #0 {
; NO_SVE-NEXT: ldr q26, [x1, #32]
; NO_SVE-NEXT: ldr q27, [x1, #16]
; NO_SVE-NEXT: ldr q11, [x1]
+; NO_SVE-NEXT: stp q3, q2, [x0, #192]
; NO_SVE-NEXT: stp q1, q0, [x0, #224]
; NO_SVE-NEXT: mov v0.16b, v8.16b
-; NO_SVE-NEXT: stp q3, q2, [x0, #192]
; NO_SVE-NEXT: mov v1.16b, v8.16b
; NO_SVE-NEXT: mov v2.16b, v8.16b
; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
index 57511fe983feb..76b564822c76a 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
@@ -131,6 +131,7 @@ static_library("LLVMAArch64CodeGen") {
"AArch64MCInstLower.cpp",
"AArch64MIPeepholeOpt.cpp",
"AArch64MachineFunctionInfo.cpp",
+ "AArch64MachineScheduler.cpp",
"AArch64MacroFusion.cpp",
"AArch64PBQPRegAlloc.cpp",
"AArch64PromoteConstant.cpp",
More information about the llvm-commits
mailing list