[llvm] r304048 - AArch64: Fix cmpxchg O0 expansion
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Fri May 26 16:48:59 PDT 2017
Author: matze
Date: Fri May 26 18:48:59 2017
New Revision: 304048
URL: http://llvm.org/viewvc/llvm-project?rev=304048&view=rev
Log:
AArch64: Fix cmpxchg O0 expansion
- Rewrite livein calculation to use the computeLiveIns() helper
function. This is slightly less efficient but easier to reason about
and doesn't unnecessarily add pristine and reserved registers[1]
- Zero the status register at the beginning of the loop to make sure it
has a defined value.
- Remove kill flags of values that need to stay alive throughout the loop.
[1] An upcoming commit of mine will tighten the MachineVerifier to catch
these.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
llvm/trunk/test/CodeGen/AArch64/cmpxchg-O0.ll
llvm/trunk/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp?rev=304048&r1=304047&r2=304048&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp Fri May 26 18:48:59 2017
@@ -584,27 +584,21 @@ bool AArch64ExpandPseudo::expandMOVImm(M
return true;
}
-static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
- for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
- MBB->addLiveIn(*I);
-}
-
bool AArch64ExpandPseudo::expandCMP_SWAP(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
- MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Dest = MI.getOperand(0);
unsigned StatusReg = MI.getOperand(1).getReg();
- MachineOperand &Addr = MI.getOperand(2);
- MachineOperand &Desired = MI.getOperand(3);
- MachineOperand &New = MI.getOperand(4);
-
- LivePhysRegs LiveRegs(TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ bool StatusDead = MI.getOperand(1).isDead();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned DesiredReg = MI.getOperand(3).getReg();
+ unsigned NewReg = MI.getOperand(4).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -616,19 +610,18 @@ bool AArch64ExpandPseudo::expandCMP_SWAP
MF->insert(++StoreBB->getIterator(), DoneBB);
// .Lloadcmp:
+ // mov wStatus, 0
// ldaxr xDest, [xAddr]
// cmp xDest, xDesired
// b.ne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(Dest.getReg());
- LoadCmpBB->addLiveIn(Desired.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
-
+ if (!StatusDead)
+ BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
+ .addImm(0).addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
- .addReg(Addr.getReg());
+ .addReg(AddrReg);
BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .add(Desired)
+ .addReg(DesiredReg)
.addImm(ExtendImm);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
.addImm(AArch64CC::NE)
@@ -640,25 +633,35 @@ bool AArch64ExpandPseudo::expandCMP_SWAP
// .Lstore:
// stlxr wStatus, xNew, [xAddr]
// cbnz wStatus, .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(New.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
-
- BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr);
+ BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
+ .addReg(NewReg)
+ .addReg(AddrReg);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
- .addReg(StatusReg, RegState::Kill)
+ .addReg(StatusReg, getKillRegState(StatusDead))
.addMBB(LoadCmpBB);
StoreBB->addSuccessor(LoadCmpBB);
StoreBB->addSuccessor(DoneBB);
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute livein lists.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass around the loop to get loop carried registers right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
@@ -671,16 +674,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP
MachineOperand &DestLo = MI.getOperand(0);
MachineOperand &DestHi = MI.getOperand(1);
unsigned StatusReg = MI.getOperand(2).getReg();
- MachineOperand &Addr = MI.getOperand(3);
- MachineOperand &DesiredLo = MI.getOperand(4);
- MachineOperand &DesiredHi = MI.getOperand(5);
- MachineOperand &NewLo = MI.getOperand(6);
- MachineOperand &NewHi = MI.getOperand(7);
-
- LivePhysRegs LiveRegs(TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ bool StatusDead = MI.getOperand(2).isDead();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(3).getReg();
+ unsigned DesiredLoReg = MI.getOperand(4).getReg();
+ unsigned DesiredHiReg = MI.getOperand(5).getReg();
+ unsigned NewLoReg = MI.getOperand(6).getReg();
+ unsigned NewHiReg = MI.getOperand(7).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
@@ -696,20 +698,13 @@ bool AArch64ExpandPseudo::expandCMP_SWAP
// cmp xDestLo, xDesiredLo
// sbcs xDestHi, xDesiredHi
// b.ne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(DestLo.getReg());
- LoadCmpBB->addLiveIn(DestHi.getReg());
- LoadCmpBB->addLiveIn(DesiredLo.getReg());
- LoadCmpBB->addLiveIn(DesiredHi.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
-
BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
.addReg(DestLo.getReg(), RegState::Define)
.addReg(DestHi.getReg(), RegState::Define)
- .addReg(Addr.getReg());
+ .addReg(AddrReg);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
- .add(DesiredLo)
+ .addReg(DesiredLoReg)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(AArch64::WZR)
@@ -717,14 +712,14 @@ bool AArch64ExpandPseudo::expandCMP_SWAP
.addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
- .add(DesiredHi)
+ .addReg(DesiredHiReg)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(StatusReg, RegState::Kill)
.addUse(StatusReg, RegState::Kill)
.addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
- .addUse(StatusReg, RegState::Kill)
+ .addUse(StatusReg, getKillRegState(StatusDead))
.addMBB(DoneBB);
LoadCmpBB->addSuccessor(DoneBB);
LoadCmpBB->addSuccessor(StoreBB);
@@ -732,28 +727,36 @@ bool AArch64ExpandPseudo::expandCMP_SWAP
// .Lstore:
// stlxp wStatus, xNewLo, xNewHi, [xAddr]
// cbnz wStatus, .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(NewLo.getReg());
- StoreBB->addLiveIn(NewHi.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
- .add(NewLo)
- .add(NewHi)
- .add(Addr);
+ .addReg(NewLoReg)
+ .addReg(NewHiReg)
+ .addReg(AddrReg);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
- .addReg(StatusReg, RegState::Kill)
+ .addReg(StatusReg, getKillRegState(StatusDead))
.addMBB(LoadCmpBB);
StoreBB->addSuccessor(LoadCmpBB);
StoreBB->addSuccessor(DoneBB);
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute liveness bottom up.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass in the loop to get the loop carried dependencies right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
Modified: llvm/trunk/test/CodeGen/AArch64/cmpxchg-O0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/cmpxchg-O0.ll?rev=304048&r1=304047&r2=304048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/cmpxchg-O0.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/cmpxchg-O0.ll Fri May 26 18:48:59 2017
@@ -3,10 +3,11 @@
define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_8:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1, uxtb
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxrb [[STATUS:w[3-9]]], w2, [x0]
+; CHECK: stlxrb [[STATUS]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
@@ -18,6 +19,7 @@ define { i8, i1 } @test_cmpxchg_8(i8* %a
define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_16:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1, uxth
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
@@ -33,10 +35,11 @@ define { i16, i1 } @test_cmpxchg_16(i16*
define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_32:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxr [[STATUS:w[3-9]]], w2, [x0]
+; CHECK: stlxr [[STATUS]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
@@ -48,10 +51,11 @@ define { i32, i1 } @test_cmpxchg_32(i32*
define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_64:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxr [[OLD:x[0-9]+]], [x0]
; CHECK: cmp [[OLD]], x1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxr [[STATUS:w[3-9]]], x2, [x0]
+; CHECK: stlxr [[STATUS]], x2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{x[0-9]+}}, [[OLD]], x1
Modified: llvm/trunk/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-cmpxchg.ll?rev=304048&r1=304047&r2=304048&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-cmpxchg.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-cmpxchg.ll Fri May 26 18:48:59 2017
@@ -2,11 +2,12 @@
; CHECK-LABEL: cmpxchg_monotonic_32:
; CHECK: [[RETRY:.LBB[0-9_]+]]:
+; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, [x0]
+; CHECK-NEXT: stlxr [[STATUS]], w2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp [[OLD]], w1
@@ -27,11 +28,12 @@ define i32 @cmpxchg_monotonic_32(i32* %p
; CHECK: // BB#0:
; CHECK: ldr [[NEW:w[0-9]+]], [x2]
; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]:
+; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-NEXT: stlxr [[STATUS]], [[NEW]], [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp [[OLD]], w1
@@ -51,11 +53,12 @@ define i32 @cmpxchg_acq_rel_32_load(i32*
; CHECK-LABEL: cmpxchg_seq_cst_64:
; CHECK: [[RETRY:.LBB[0-9_]+]]:
+; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], x1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], x2, [x0]
+; CHECK-NEXT: stlxr [[STATUS]], x2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp [[OLD]], x1
More information about the llvm-commits
mailing list