[llvm] [AArch64] Fix tryMergeAdjacentSTG function in PrologEpilog pass (PR #68873)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 9 22:44:15 PST 2023


https://github.com/kartcq updated https://github.com/llvm/llvm-project/pull/68873

>From 33218eeeb408bfc644c755ce3508d20afbaa35ae Mon Sep 17 00:00:00 2001
From: kartcq <quic_kartc at quicinc.com>
Date: Thu, 12 Oct 2023 03:27:56 -0700
Subject: [PATCH 1/5] [AArch64] Fix tryMergeAdjacentSTG function in
 PrologEpilog pass

The tryMergeAdjacentSTG function tries to merge multiple
stg/st2g/stg_loop instructions. It doesn't verify the liveness of NZCV
flag before moving around STGloop which also alters NZCV flags. This
was not issue before the patch 5e612bc as these stack tag stores does
not alter the NZCV flags. But after the change, this merge function
leads to miscompilation because of control flow change in instructions.
Added the check to to see if the first instruction after insert point
reads or writes to NZCV flag and it's liveout state. This check happens
after the filling of merge list just before merge and bails out if
necessary.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 38 ++++++++++
 llvm/test/CodeGen/AArch64/settag-merge.ll     | 69 +++++++++++++++++++
 2 files changed, 107 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 78f85faaf69bf96..3adf005243cd8a2 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3703,6 +3703,17 @@ bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
   return true;
 }
 
+bool isNZCVLiveOut(MachineBasicBlock &MBB) {
+  // Loop over all of the successors of the basic block, checking to see if
+  // the value is either live in the block, or if it is killed in the block.
+  // We are checking for LiveIns alone
+  for (MachineBasicBlock *SuccMBB : MBB.successors()) {
+    if (SuccMBB->isLiveIn(AArch64::NZCV))
+      return true;
+  }
+  return false;
+}
+
 // Detect a run of memory tagging instructions for adjacent stack frame slots,
 // and replace them with a shorter instruction sequence:
 // * replace STG + STG with ST2G
@@ -3763,6 +3774,33 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
   MachineBasicBlock::iterator InsertI = Instrs.back().MI;
   InsertI++;
 
+  // All the gathered stack tag instructions are merged and placed after
+  // last tag store in the list. The check should be made if the nzcv
+  // flag that we might be overriding will affect upcoming instructions
+  // that reads the flag(Here NZCV) or if the register is live out. If
+  // the flag is defined again we don't have this problem as the flag
+  // is anyway subject to modification.
+  // Though there could be different ways to curb this maltransformation,
+  // this seems to be simple and straight forward without changing
+  // existing algorithm.
+  // FIXME : This approach of bailing out from merge is also conservative
+  // in some ways like even if stg loops are not present after merge
+  // these checks are done (which are not needed).
+  bool modifiesFirst = false, readsFirst = false;
+  for (MachineBasicBlock::iterator I = InsertI, E = MBB->end(); I != E; I++) {
+    MachineInstr &MI = *I;
+    if (MI.readsRegister(AArch64::NZCV)) {
+      readsFirst = true;
+      break;
+    }
+    if (MI.modifiesRegister(AArch64::NZCV)) {
+      modifiesFirst = true;
+      break;
+    }
+  }
+  if (!modifiesFirst && (readsFirst || isNZCVLiveOut(*MBB)))
+    return InsertI;
+
   llvm::stable_sort(Instrs,
                     [](const TagStoreInstr &Left, const TagStoreInstr &Right) {
                       return Left.Offset < Right.Offset;
diff --git a/llvm/test/CodeGen/AArch64/settag-merge.ll b/llvm/test/CodeGen/AArch64/settag-merge.ll
index 0c00931a1fd0c74..1f56368c41df75c 100644
--- a/llvm/test/CodeGen/AArch64/settag-merge.ll
+++ b/llvm/test/CodeGen/AArch64/settag-merge.ll
@@ -289,3 +289,72 @@ entry:
   call void @llvm.aarch64.settag(ptr %c2, i64 128)
   ret void
 }
+
+; Function Attrs: nounwind
+declare i32 @printf(ptr, ...) #0
+
+ at .str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Case 1
+; Insert point of stg merge is followed by nzcv read
+; Don't merge in this case
+
+define i32 @stg_no_merge(i32 %in) {
+entry:
+; CHECK-LABEL: stg_no_merge:
+; CHECK: stg sp, [sp, #528]
+; CHECK-NEXT: .LBB10_1:
+; CHECK: st2g x9, [x9], #32
+; CHECK-NEXT: subs x8, x8, #32
+; CHECK-NEXT: b.ne .LBB10_1
+; CHECK-NEXT: // %bb.2:
+; CHECK-NEXT: cmp w0, #10
+; CHECK-NEXT: stg sp, [sp]
+; CHECK-NEXT: b.ge .LBB10_4
+
+  %a = alloca i8, i32 16, align 16
+  %b = alloca i8, i32 512, align 16
+  %c = alloca i8, i32 16, align 16
+  call void @llvm.aarch64.settag(ptr %a, i64 16)
+  call void @llvm.aarch64.settag(ptr %b, i64 512)
+  %cmp = icmp slt i32 %in, 10
+  call void @llvm.aarch64.settag(ptr %c, i64 16)
+  br i1 %cmp, label %return0, label %return1
+
+return0:                                           ; preds = %entry
+  %call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1
+  ret i32 0
+
+return1:
+  ret i32 1
+}
+
+; Case 2
+; Insert point of stg merge is not followed by nzcv read
+; Merge in this case
+
+define i32 @stg_merge(i32 %in) {
+entry:
+; CHECK-LABEL: stg_merge:
+; CHECK: mov x8, #544
+; CHECK-NEXT: .LBB11_1:
+; CHECK: st2g sp, [sp], #32
+; CHECK-NEXT: subs x8, x8, #32
+; CHECK-NEXT: b.ne .LBB11_1
+
+
+  %a = alloca i8, i32 16, align 16
+  %b = alloca i8, i32 512, align 16
+  %c = alloca i8, i32 16, align 16
+  call void @llvm.aarch64.settag(ptr %a, i64 16)
+  call void @llvm.aarch64.settag(ptr %b, i64 512)
+  call void @llvm.aarch64.settag(ptr %c, i64 16)
+  br label %return1
+
+return0:                                           ; preds = %entry
+  %call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1
+  ret i32 0
+
+return1:
+  ret i32 1
+}

>From 9350a2bcb510720ac7f12e8491c5ed76e2cbf181 Mon Sep 17 00:00:00 2001
From: kartcq <99465331+kartcq at users.noreply.github.com>
Date: Tue, 7 Nov 2023 12:13:03 +0530
Subject: [PATCH 2/5] Update AArch64FrameLowering.cpp

Checking liveness of NZCV flag using more appropriate APIs.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 45 ++++++-------------
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 3adf005243cd8a2..260f89dfd260f35 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3703,17 +3703,6 @@ bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
   return true;
 }
 
-bool isNZCVLiveOut(MachineBasicBlock &MBB) {
-  // Loop over all of the successors of the basic block, checking to see if
-  // the value is either live in the block, or if it is killed in the block.
-  // We are checking for LiveIns alone
-  for (MachineBasicBlock *SuccMBB : MBB.successors()) {
-    if (SuccMBB->isLiveIn(AArch64::NZCV))
-      return true;
-  }
-  return false;
-}
-
 // Detect a run of memory tagging instructions for adjacent stack frame slots,
 // and replace them with a shorter instruction sequence:
 // * replace STG + STG with ST2G
@@ -3772,33 +3761,25 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
 
   // New code will be inserted after the last tagging instruction we've found.
   MachineBasicBlock::iterator InsertI = Instrs.back().MI;
-  InsertI++;
 
   // All the gathered stack tag instructions are merged and placed after
   // last tag store in the list. The check should be made if the nzcv
-  // flag that we might be overriding will affect upcoming instructions
-  // that reads the flag(Here NZCV) or if the register is live out. If
-  // the flag is defined again we don't have this problem as the flag
-  // is anyway subject to modification.
-  // Though there could be different ways to curb this maltransformation,
-  // this seems to be simple and straight forward without changing
-  // existing algorithm.
+  // flag is live at the point where we are trying to insert. Otherwise
+  // the nzcv flag might get clobbered if any stg loops are present.
+
   // FIXME : This approach of bailing out from merge is also conservative
-  // in some ways like even if stg loops are not present after merge
-  // these checks are done (which are not needed).
-  bool modifiesFirst = false, readsFirst = false;
-  for (MachineBasicBlock::iterator I = InsertI, E = MBB->end(); I != E; I++) {
-    MachineInstr &MI = *I;
-    if (MI.readsRegister(AArch64::NZCV)) {
-      readsFirst = true;
-      break;
-    }
-    if (MI.modifiesRegister(AArch64::NZCV)) {
-      modifiesFirst = true;
+  // in some ways like even if stg loops are not present after merge in
+  // the insert list, this liveness check is done (which is not needed).
+  LivePhysRegs LR(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
+  LR.addLiveOuts(*MBB);
+  for (auto I = MBB->rbegin(); ; ++I) {
+    MachineInstr &MIns = *I;
+    if (MIns == InsertI)
       break;
-    }
+    LR.stepBackward(*I);
   }
-  if (!modifiesFirst && (readsFirst || isNZCVLiveOut(*MBB)))
+  InsertI++;
+  if (LR.contains(AArch64::NZCV))
     return InsertI;
 
   llvm::stable_sort(Instrs,

>From 62ae4d1e8b483636f4a460948be29b8fe7fb2aec Mon Sep 17 00:00:00 2001
From: kartcq <99465331+kartcq at users.noreply.github.com>
Date: Tue, 7 Nov 2023 12:14:50 +0530
Subject: [PATCH 3/5] Update settag-merge.ll

---
 llvm/test/CodeGen/AArch64/settag-merge.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/settag-merge.ll b/llvm/test/CodeGen/AArch64/settag-merge.ll
index 1f56368c41df75c..af922b91b221a86 100644
--- a/llvm/test/CodeGen/AArch64/settag-merge.ll
+++ b/llvm/test/CodeGen/AArch64/settag-merge.ll
@@ -299,9 +299,9 @@ declare i32 @printf(ptr, ...) #0
 ; Insert point of stg merge is followed by nzcv read
 ; Don't merge in this case
 
-define i32 @stg_no_merge(i32 %in) {
+define i32 @nzcv_clobber(i32 %in) {
 entry:
-; CHECK-LABEL: stg_no_merge:
+; CHECK-LABEL: nzcv_clobber:
 ; CHECK: stg sp, [sp, #528]
 ; CHECK-NEXT: .LBB10_1:
 ; CHECK: st2g x9, [x9], #32
@@ -333,9 +333,9 @@ return1:
 ; Insert point of stg merge is not followed by nzcv read
 ; Merge in this case
 
-define i32 @stg_merge(i32 %in) {
+define i32 @nzcv_no_clobber(i32 %in) {
 entry:
-; CHECK-LABEL: stg_merge:
+; CHECK-LABEL: nzcv_no_clobber:
 ; CHECK: mov x8, #544
 ; CHECK-NEXT: .LBB11_1:
 ; CHECK: st2g sp, [sp], #32

>From 9f135da0960a6a5e15d6606bf2c758215aedc991 Mon Sep 17 00:00:00 2001
From: kartcq <99465331+kartcq at users.noreply.github.com>
Date: Tue, 7 Nov 2023 13:20:39 +0530
Subject: [PATCH 4/5] Format AArch64FrameLowering.cpp

---
 llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 260f89dfd260f35..404fcafb5046792 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3772,7 +3772,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
   // the insert list, this liveness check is done (which is not needed).
   LivePhysRegs LR(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
   LR.addLiveOuts(*MBB);
-  for (auto I = MBB->rbegin(); ; ++I) {
+  for (auto I = MBB->rbegin();; ++I) {
     MachineInstr &MIns = *I;
     if (MIns == InsertI)
       break;

>From 3c0588806cb3c9ae08afd3750a2e7a41f7ec4dbc Mon Sep 17 00:00:00 2001
From: kartcq <99465331+kartcq at users.noreply.github.com>
Date: Fri, 10 Nov 2023 12:14:02 +0530
Subject: [PATCH 5/5] Update AArch64FrameLowering.cpp

---
 .../Target/AArch64/AArch64FrameLowering.cpp    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 404fcafb5046792..86950c7957f38a3 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3767,19 +3767,19 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
   // flag is live at the point where we are trying to insert. Otherwise
   // the nzcv flag might get clobbered if any stg loops are present.
 
-  // FIXME : This approach of bailing out from merge is also conservative
-  // in some ways like even if stg loops are not present after merge in
-  // the insert list, this liveness check is done (which is not needed).
-  LivePhysRegs LR(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
-  LR.addLiveOuts(*MBB);
+  // FIXME : This approach of bailing out from merge is conservative in
+  // some ways like even if stg loops are not present after merge the
+  // insert list, this liveness check is done (which is not needed).
+  LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
+  LiveRegs.addLiveOuts(*MBB);
   for (auto I = MBB->rbegin();; ++I) {
-    MachineInstr &MIns = *I;
-    if (MIns == InsertI)
+    MachineInstr &MI = *I;
+    if (MI == InsertI)
       break;
-    LR.stepBackward(*I);
+    LiveRegs.stepBackward(*I);
   }
   InsertI++;
-  if (LR.contains(AArch64::NZCV))
+  if (LiveRegs.contains(AArch64::NZCV))
     return InsertI;
 
   llvm::stable_sort(Instrs,



More information about the llvm-commits mailing list