[llvm] [SPARC] Prevent RESTORE from sourcing from %o7 in call delay slots (PR #172593)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 17 03:21:13 PST 2025
https://github.com/koachan updated https://github.com/llvm/llvm-project/pull/172593
>From 5b9d25d6e9cd6bf7c7e4ef27e687e9019e46ffbc Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Wed, 17 Dec 2025 08:48:44 +0700
Subject: [PATCH 1/3] [SPARC] Prevent RESTORE from sourcing from %o7 in call
delay slots
Combining instructions that reads from %o7 with a RESTORE in call delay slots
will result in a RESTORE instruction that reads from %o7, which has been
overwritten by the call instruction, resulting in junk values being produced.
This should fix the issue with `test-suite::lencod.test`.
---
llvm/lib/Target/Sparc/DelaySlotFiller.cpp | 49 +-
llvm/lib/Target/Sparc/SparcInstrInfo.td | 4 +-
.../CodeGen/SPARC/2011-01-19-DelaySlot.ll | 563 ++++++++++++++++--
3 files changed, 547 insertions(+), 69 deletions(-)
diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 024030d196ee3..c8caf7dffad2a 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -390,10 +390,10 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
return true;
}
-static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI,
+static bool combineRestoreADD(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator RestoreMI,
MachineBasicBlock::iterator AddMI,
- const TargetInstrInfo *TII)
-{
+ const TargetInstrInfo *TII) {
// Before: add <op0>, <op1>, %i[0-7]
// restore %g0, %g0, %i[0-7]
//
@@ -403,6 +403,21 @@ static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI,
if (reg < SP::I0 || reg > SP::I7)
return false;
+ // Check whether it uses %o7 as its source and the corresponding branch
+ // instruction is a call.
+ MachineBasicBlock::iterator LastInst = MBB.getFirstTerminator();
+ bool IsCall = LastInst != MBB.end() && LastInst->isCall();
+
+ // Check whether it uses %o7 as its source.
+ if (IsCall && AddMI->getOpcode() == SP::ADDrr &&
+ (AddMI->getOperand(1).getReg() == SP::O7 ||
+ AddMI->getOperand(2).getReg() == SP::O7))
+ return false;
+
+ if (IsCall && AddMI->getOpcode() == SP::ADDri &&
+ AddMI->getOperand(1).getReg() == SP::O7)
+ return false;
+
// Erase RESTORE.
RestoreMI->eraseFromParent();
@@ -417,10 +432,10 @@ static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI,
return true;
}
-static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI,
+static bool combineRestoreOR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator RestoreMI,
MachineBasicBlock::iterator OrMI,
- const TargetInstrInfo *TII)
-{
+ const TargetInstrInfo *TII) {
// Before: or <op0>, <op1>, %i[0-7]
// restore %g0, %g0, %i[0-7]
// and <op0> or <op1> is zero,
@@ -442,6 +457,20 @@ static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI,
&& (!OrMI->getOperand(2).isImm() || OrMI->getOperand(2).getImm() != 0))
return false;
+ // Check whether it uses %o7 as its source and the corresponding branch
+ // instruction is a call.
+ MachineBasicBlock::iterator LastInst = MBB.getFirstTerminator();
+ bool IsCall = LastInst != MBB.end() && LastInst->isCall();
+
+ if (IsCall && OrMI->getOpcode() == SP::ORrr &&
+ (OrMI->getOperand(1).getReg() == SP::O7 ||
+ OrMI->getOperand(2).getReg() == SP::O7))
+ return false;
+
+ if (IsCall && OrMI->getOpcode() == SP::ORrr &&
+ OrMI->getOperand(1).getReg() == SP::O7)
+ return false;
+
// Erase RESTORE.
RestoreMI->eraseFromParent();
@@ -520,9 +549,13 @@ bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB,
switch (PrevInst->getOpcode()) {
default: break;
case SP::ADDrr:
- case SP::ADDri: return combineRestoreADD(MBBI, PrevInst, TII); break;
+ case SP::ADDri:
+ return combineRestoreADD(MBB, MBBI, PrevInst, TII);
+ break;
case SP::ORrr:
- case SP::ORri: return combineRestoreOR(MBBI, PrevInst, TII); break;
+ case SP::ORri:
+ return combineRestoreOR(MBB, MBBI, PrevInst, TII);
+ break;
case SP::SETHIi: return combineRestoreSETHIi(MBBI, PrevInst, TII); break;
}
// It cannot combine with the previous instruction.
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 107817fcab6df..c08599402daa8 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -1589,7 +1589,7 @@ let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
// Instructions for tail calls.
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1,
- isTerminator = 1, isBarrier = 1 in {
+ isTerminator = 1, isBarrier = 1, isCall = 1 in {
def TAIL_CALL : InstSP<(outs), (ins calltarget:$disp, variable_ops),
"call $disp",
[(tailcall tglobaladdr:$disp)]> {
@@ -1603,7 +1603,7 @@ def : Pat<(tailcall (iPTR texternalsym:$dst)),
(TAIL_CALL texternalsym:$dst)>;
let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1, isTerminator = 1,
- isBarrier = 1, rd = 0 in {
+ isBarrier = 1, isCall = 1, rd = 0 in {
def TAIL_CALLri : F3_2<2, 0b111000,
(outs), (ins (MEMri $rs1, $simm13):$addr, variable_ops),
"jmp $addr",
diff --git a/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index 767ef7eb510e6..518020bc9ca3a 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -1,32 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
;RUN: llc -mtriple=sparc < %s -verify-machineinstrs | FileCheck %s
;RUN: llc -mtriple=sparc -O0 < %s -verify-machineinstrs | FileCheck %s -check-prefix=UNOPT
target triple = "sparc-unknown-linux-gnu"
define i32 @test(i32 %a) #0 {
+; CHECK-LABEL: test:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: test:
+; UNOPT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -96, %sp
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i0, %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %g0, %o0, %o0
entry:
-; CHECK: test
-; CHECK: call bar
-; CHECK-NOT: nop
-; CHECK: ret
-; CHECK-NEXT: restore
%0 = tail call i32 @bar(i32 %a) nounwind
ret i32 %0
}
define i32 @test_jmpl(ptr nocapture %f, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: test_jmpl:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: mov %i2, %o1
+; CHECK-NEXT: call %i0
+; CHECK-NEXT: mov %i1, %o0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: test_jmpl:
+; UNOPT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -96, %sp
+; UNOPT-NEXT: mov %i2, %o1
+; UNOPT-NEXT: call %i0
+; UNOPT-NEXT: mov %i1, %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %g0, %o0, %o0
entry:
-; CHECK: test_jmpl
-; CHECK: call
-; CHECK-NOT: nop
-; CHECK: ret
-; CHECK-NEXT: restore
%0 = tail call i32 %f(i32 %a, i32 %b) nounwind
ret i32 %0
}
define i32 @test_loop(i32 %a, i32 %b) nounwind readnone {
-; CHECK: test_loop
+; CHECK-LABEL: test_loop:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: cmp %o1, 1
+; CHECK-NEXT: bl .LBB2_3
+; CHECK-NEXT: nop
+; CHECK-NEXT: ! %bb.1: ! %bb.preheader
+; CHECK-NEXT: mov %g0, %o2
+; CHECK-NEXT: mov %g0, %o3
+; CHECK-NEXT: mov 1, %o4
+; CHECK-NEXT: .LBB2_2: ! %bb
+; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: andn %o4, %o3, %o5
+; CHECK-NEXT: sll %o2, %o5, %o5
+; CHECK-NEXT: add %o5, %o0, %o0
+; CHECK-NEXT: add %o3, 1, %o3
+; CHECK-NEXT: cmp %o1, %o3
+; CHECK-NEXT: bne .LBB2_2
+; CHECK-NEXT: add %o2, %o1, %o2
+; CHECK-NEXT: .LBB2_3: ! %bb5
+; CHECK-NEXT: retl
+; CHECK-NEXT: nop
+;
+; UNOPT-LABEL: test_loop:
+; UNOPT: ! %bb.0: ! %entry
+; UNOPT-NEXT: add %sp, -104, %sp
+; UNOPT-NEXT: mov %o1, %o2
+; UNOPT-NEXT: st %o2, [%sp+92] ! 4-byte Folded Spill
+; UNOPT-NEXT: mov %o0, %o1
+; UNOPT-NEXT: mov %g0, %o0
+; UNOPT-NEXT: cmp %o2, 1
+; UNOPT-NEXT: st %o1, [%sp+96] ! 4-byte Folded Spill
+; UNOPT-NEXT: bl .LBB2_2
+; UNOPT-NEXT: st %o0, [%sp+100]
+; UNOPT-NEXT: ba .LBB2_1
+; UNOPT-NEXT: nop
+; UNOPT-NEXT: .LBB2_1: ! %bb
+; UNOPT-NEXT: ! =>This Inner Loop Header: Depth=1
+; UNOPT-NEXT: ld [%sp+100], %o0 ! 4-byte Folded Reload
+; UNOPT-NEXT: ld [%sp+96], %o3 ! 4-byte Folded Reload
+; UNOPT-NEXT: ld [%sp+92], %o2 ! 4-byte Folded Reload
+; UNOPT-NEXT: smul %o0, %o2, %o1
+; UNOPT-NEXT: mov 1, %o4
+; UNOPT-NEXT: andn %o4, %o0, %o4
+; UNOPT-NEXT: sll %o1, %o4, %o1
+; UNOPT-NEXT: add %o1, %o3, %o1
+; UNOPT-NEXT: add %o0, 1, %o0
+; UNOPT-NEXT: cmp %o0, %o2
+; UNOPT-NEXT: st %o1, [%sp+96] ! 4-byte Folded Spill
+; UNOPT-NEXT: bne .LBB2_1
+; UNOPT-NEXT: st %o0, [%sp+100]
+; UNOPT-NEXT: ba .LBB2_2
+; UNOPT-NEXT: nop
+; UNOPT-NEXT: .LBB2_2: ! %bb5
+; UNOPT-NEXT: ld [%sp+96], %o0 ! 4-byte Folded Reload
+; UNOPT-NEXT: retl
+; UNOPT-NEXT: add %sp, 104, %sp
entry:
%0 = icmp sgt i32 %b, 0
br i1 %0, label %bb, label %bb5
@@ -41,26 +119,59 @@ bb: ; preds = %entry, %bb
%a_addr.0 = add i32 %.pn, %a_addr.18
%3 = add nsw i32 %1, 1
%exitcond = icmp eq i32 %3, %b
-;CHECK: cmp
-;CHECK: bne
-;CHECK-NOT: nop
br i1 %exitcond, label %bb5, label %bb
bb5: ; preds = %bb, %entry
%a_addr.1.lcssa = phi i32 [ %a, %entry ], [ %a_addr.0, %bb ]
-;CHECK: retl
-;CHECK-NOT: restore
ret i32 %a_addr.1.lcssa
}
define i32 @test_inlineasm(i32 %a) #0 {
+; CHECK-LABEL: test_inlineasm:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: cmp %i0, -1
+; CHECK-NEXT: !APP
+; CHECK-NEXT: sethi 0, %g0
+; CHECK-NEXT: !NO_APP
+; CHECK-NEXT: ble .LBB3_2
+; CHECK-NEXT: nop
+; CHECK-NEXT: ! %bb.1: ! %bb1
+; CHECK-NEXT: call bar
+; CHECK-NEXT: nop
+; CHECK-NEXT: ba .LBB3_3
+; CHECK-NEXT: nop
+; CHECK-NEXT: .LBB3_2: ! %bb
+; CHECK-NEXT: call foo
+; CHECK-NEXT: nop
+; CHECK-NEXT: .LBB3_3: ! %bb
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: test_inlineasm:
+; UNOPT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -96, %sp
+; UNOPT-NEXT: st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT: !APP
+; UNOPT-NEXT: sethi 0, %g0
+; UNOPT-NEXT: !NO_APP
+; UNOPT-NEXT: cmp %i0, -1
+; UNOPT-NEXT: bg .LBB3_2
+; UNOPT-NEXT: nop
+; UNOPT-NEXT: ba .LBB3_1
+; UNOPT-NEXT: nop
+; UNOPT-NEXT: .LBB3_1: ! %bb
+; UNOPT-NEXT: call foo
+; UNOPT-NEXT: ld [%fp+-4], %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %g0, %o0, %o0
+; UNOPT-NEXT: .LBB3_2: ! %bb1
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: ld [%fp+-4], %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %g0, %o0, %o0
entry:
-;CHECK-LABEL: test_inlineasm:
-;CHECK: cmp
-;CHECK: sethi
-;CHECK: !NO_APP
-;CHECK-NEXT: ble
-;CHECK-NEXT: nop
tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
%0 = icmp slt i32 %a, 0
br i1 %0, label %bb, label %bb1
@@ -80,22 +191,57 @@ declare i32 @bar(i32)
define i32 @test_implicit_def() #0 {
+; CHECK-LABEL: test_implicit_def:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: call func
+; CHECK-NEXT: nop
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %g0, %o0
+;
+; UNOPT-LABEL: test_implicit_def:
+; UNOPT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -96, %sp
+; UNOPT-NEXT: ! implicit-def: $o0
+; UNOPT-NEXT: call func
+; UNOPT-NEXT: nop
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %g0, %g0, %o0
entry:
-;UNOPT-LABEL: test_implicit_def:
-;UNOPT: call func
-;UNOPT-NEXT: nop
%0 = tail call i32 @func(ptr undef) nounwind
ret i32 0
}
define i32 @prevent_o7_in_call_delay_slot(i32 %i0) #0 {
+; CHECK-LABEL: prevent_o7_in_call_delay_slot:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: add %i0, 2, %o5
+; CHECK-NEXT: add %i0, 3, %o7
+; CHECK-NEXT: !APP
+; CHECK-NEXT: !NO_APP
+; CHECK-NEXT: add %o5, %o7, %o0
+; CHECK-NEXT: call bar
+; CHECK-NEXT: nop
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: prevent_o7_in_call_delay_slot:
+; UNOPT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -104, %sp
+; UNOPT-NEXT: add %i0, 2, %o5
+; UNOPT-NEXT: st %o5, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT: add %i0, 3, %o7
+; UNOPT-NEXT: st %o7, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT: !APP
+; UNOPT-NEXT: !NO_APP
+; UNOPT-NEXT: ld [%fp+-8], %i1 ! 4-byte Folded Reload
+; UNOPT-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: add %i0, %i1, %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %g0, %o0, %o0
entry:
-;CHECK-LABEL: prevent_o7_in_call_delay_slot:
-;CHECK: add %i0, 2, %o5
-;CHECK: add %i0, 3, %o7
-;CHECK: add %o5, %o7, %o0
-;CHECK: call bar
-;CHECK-NEXT: nop
%0 = add nsw i32 %i0, 2
%1 = add nsw i32 %i0, 3
tail call void asm sideeffect "", "r,r,~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o6},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7}"(i32 %0, i32 %1)
@@ -104,46 +250,187 @@ entry:
ret i32 %3
}
+define i32 @prevent_o7_in_restore_add_in_call_delay_slot(i32 %i0) #0 {
+; CHECK-LABEL: prevent_o7_in_restore_add_in_call_delay_slot:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: add %i0, 2, %o5
+; CHECK-NEXT: add %i0, 3, %o7
+; CHECK-NEXT: !APP
+; CHECK-NEXT: !NO_APP
+; CHECK-NEXT: add %o5, %o7, %i0
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore
+;
+; UNOPT-LABEL: prevent_o7_in_restore_add_in_call_delay_slot:
+; UNOPT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -104, %sp
+; UNOPT-NEXT: add %i0, 2, %o5
+; UNOPT-NEXT: st %o5, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT: add %i0, 3, %o7
+; UNOPT-NEXT: st %o7, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT: !APP
+; UNOPT-NEXT: !NO_APP
+; UNOPT-NEXT: ld [%fp+-8], %i1 ! 4-byte Folded Reload
+; UNOPT-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: add %i0, %i1, %i0
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i0, %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore
+entry:
+ %0 = add nsw i32 %i0, 2
+ %1 = add nsw i32 %i0, 3
+ tail call void asm sideeffect "", "r,r,~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o6},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7}"(i32 %0, i32 %1)
+ %2 = add nsw i32 %0, %1
+ %3 = tail call i32 @bar(i32 %2)
+ ret i32 %2
+}
+
+define i32 @prevent_o7_in_restore_or_in_call_delay_slot(i32 %i0) #0 {
+; CHECK-LABEL: prevent_o7_in_restore_or_in_call_delay_slot:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: add %i0, 2, %o7
+; CHECK-NEXT: !APP
+; CHECK-NEXT: !NO_APP
+; CHECK-NEXT: mov %o7, %o0
+; CHECK-NEXT: call bar
+; CHECK-NEXT: nop
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: prevent_o7_in_restore_or_in_call_delay_slot:
+; UNOPT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -96, %sp
+; UNOPT-NEXT: add %i0, 2, %o7
+; UNOPT-NEXT: st %o7, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT: !APP
+; UNOPT-NEXT: !NO_APP
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: ld [%fp+-4], %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %g0, %o0, %o0
+entry:
+ %0 = add nsw i32 %i0, 2
+ tail call void asm sideeffect "", "r,~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7}"(i32 %0)
+ %1 = tail call i32 @bar(i32 %0)
+ ret i32 %1
+}
declare i32 @func(ptr)
define i32 @restore_add(i32 %a, i32 %b) {
+; CHECK-LABEL: restore_add:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: .cfi_def_cfa_register %fp
+; CHECK-NEXT: .cfi_window_save
+; CHECK-NEXT: .cfi_register %o7, %i7
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %o0, %i1, %o0
+;
+; UNOPT-LABEL: restore_add:
+; UNOPT: .cfi_startproc
+; UNOPT-NEXT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -96, %sp
+; UNOPT-NEXT: .cfi_def_cfa_register %fp
+; UNOPT-NEXT: .cfi_window_save
+; UNOPT-NEXT: .cfi_register %o7, %i7
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i0, %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %o0, %i1, %o0
entry:
-;CHECK-LABEL: restore_add:
-;CHECK: ret
-;CHECK: restore %o0, %i1, %o0
%0 = tail call i32 @bar(i32 %a) nounwind
%1 = add nsw i32 %0, %b
ret i32 %1
}
define i32 @restore_add_imm(i32 %a) {
+; CHECK-LABEL: restore_add_imm:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: .cfi_def_cfa_register %fp
+; CHECK-NEXT: .cfi_window_save
+; CHECK-NEXT: .cfi_register %o7, %i7
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %o0, 20, %o0
+;
+; UNOPT-LABEL: restore_add_imm:
+; UNOPT: .cfi_startproc
+; UNOPT-NEXT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -96, %sp
+; UNOPT-NEXT: .cfi_def_cfa_register %fp
+; UNOPT-NEXT: .cfi_window_save
+; UNOPT-NEXT: .cfi_register %o7, %i7
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i0, %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %o0, 20, %o0
entry:
-;CHECK-LABEL: restore_add_imm:
-;CHECK: ret
-;CHECK: restore %o0, 20, %o0
%0 = tail call i32 @bar(i32 %a) nounwind
%1 = add nsw i32 %0, 20
ret i32 %1
}
define i32 @restore_or(i32 %a) #0 {
+; CHECK-LABEL: restore_or:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: restore_or:
+; UNOPT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -96, %sp
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i0, %o0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore %g0, %o0, %o0
entry:
-;CHECK-LABEL: restore_or:
-;CHECK: ret
-;CHECK: restore %g0, %o0, %o0
%0 = tail call i32 @bar(i32 %a) nounwind
ret i32 %0
}
define i32 @restore_or_imm(i32 %a) {
+; CHECK-LABEL: restore_or_imm:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: .cfi_def_cfa_register %fp
+; CHECK-NEXT: .cfi_window_save
+; CHECK-NEXT: .cfi_register %o7, %i7
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: or %o0, 20, %i0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore
+;
+; UNOPT-LABEL: restore_or_imm:
+; UNOPT: .cfi_startproc
+; UNOPT-NEXT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -96, %sp
+; UNOPT-NEXT: .cfi_def_cfa_register %fp
+; UNOPT-NEXT: .cfi_window_save
+; UNOPT-NEXT: .cfi_register %o7, %i7
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i0, %o0
+; UNOPT-NEXT: or %o0, 20, %i0
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore
entry:
-;CHECK-LABEL: restore_or_imm:
-;CHECK: or %o0, 20, %i0
-;CHECK: ret
-;CHECK-NOT: restore %g0, %g0, %g0
-;CHECK: restore
%0 = tail call i32 @bar(i32 %a) nounwind
%1 = or i32 %0, 20
ret i32 %1
@@ -151,10 +438,48 @@ entry:
define i32 @restore_sethi(i32 %a) {
+; CHECK-LABEL: restore_sethi:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: .cfi_def_cfa_register %fp
+; CHECK-NEXT: .cfi_window_save
+; CHECK-NEXT: .cfi_register %o7, %i7
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: cmp %o0, 0
+; CHECK-NEXT: bne .LBB12_2
+; CHECK-NEXT: nop
+; CHECK-NEXT: ! %bb.1: ! %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %g0, %o0
+; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, 3072, %o0
+;
+; UNOPT-LABEL: restore_sethi:
+; UNOPT: .cfi_startproc
+; UNOPT-NEXT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -104, %sp
+; UNOPT-NEXT: .cfi_def_cfa_register %fp
+; UNOPT-NEXT: .cfi_window_save
+; UNOPT-NEXT: .cfi_register %o7, %i7
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i0, %o0
+; UNOPT-NEXT: mov %g0, %i0
+; UNOPT-NEXT: st %i0, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT: sethi 3, %i0
+; UNOPT-NEXT: cmp %o0, 0
+; UNOPT-NEXT: bne .LBB12_2
+; UNOPT-NEXT: st %i0, [%fp+-4]
+; UNOPT-NEXT: ! %bb.1: ! %entry
+; UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT: .LBB12_2: ! %entry
+; UNOPT-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore
entry:
-;CHECK-LABEL: restore_sethi:
-;CHECK-NOT: sethi 3
-;CHECK: restore %g0, 3072, %o0
%0 = tail call i32 @bar(i32 %a) nounwind
%1 = icmp ne i32 %0, 0
%2 = select i1 %1, i32 3072, i32 0
@@ -162,10 +487,49 @@ entry:
}
define i32 @restore_sethi_3bit(i32 %a) {
+; CHECK-LABEL: restore_sethi_3bit:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: .cfi_def_cfa_register %fp
+; CHECK-NEXT: .cfi_window_save
+; CHECK-NEXT: .cfi_register %o7, %i7
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: cmp %o0, 0
+; CHECK-NEXT: bne .LBB13_2
+; CHECK-NEXT: nop
+; CHECK-NEXT: ! %bb.1: ! %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %g0, %o0
+; CHECK-NEXT: .LBB13_2:
+; CHECK-NEXT: sethi 6, %i0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore
+;
+; UNOPT-LABEL: restore_sethi_3bit:
+; UNOPT: .cfi_startproc
+; UNOPT-NEXT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -104, %sp
+; UNOPT-NEXT: .cfi_def_cfa_register %fp
+; UNOPT-NEXT: .cfi_window_save
+; UNOPT-NEXT: .cfi_register %o7, %i7
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i0, %o0
+; UNOPT-NEXT: mov %g0, %i0
+; UNOPT-NEXT: st %i0, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT: sethi 6, %i0
+; UNOPT-NEXT: cmp %o0, 0
+; UNOPT-NEXT: bne .LBB13_2
+; UNOPT-NEXT: st %i0, [%fp+-4]
+; UNOPT-NEXT: ! %bb.1: ! %entry
+; UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT: .LBB13_2: ! %entry
+; UNOPT-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore
entry:
-;CHECK-LABEL: restore_sethi_3bit:
-;CHECK: sethi 6
-;CHECK-NOT: restore %g0, 6144, %o0
%0 = tail call i32 @bar(i32 %a) nounwind
%1 = icmp ne i32 %0, 0
%2 = select i1 %1, i32 6144, i32 0
@@ -173,11 +537,49 @@ entry:
}
define i32 @restore_sethi_large(i32 %a) {
+; CHECK-LABEL: restore_sethi_large:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: .cfi_def_cfa_register %fp
+; CHECK-NEXT: .cfi_window_save
+; CHECK-NEXT: .cfi_register %o7, %i7
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: cmp %o0, 0
+; CHECK-NEXT: bne .LBB14_2
+; CHECK-NEXT: nop
+; CHECK-NEXT: ! %bb.1: ! %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %g0, %o0
+; CHECK-NEXT: .LBB14_2:
+; CHECK-NEXT: sethi 4000, %i0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore
+;
+; UNOPT-LABEL: restore_sethi_large:
+; UNOPT: .cfi_startproc
+; UNOPT-NEXT: ! %bb.0: ! %entry
+; UNOPT-NEXT: save %sp, -104, %sp
+; UNOPT-NEXT: .cfi_def_cfa_register %fp
+; UNOPT-NEXT: .cfi_window_save
+; UNOPT-NEXT: .cfi_register %o7, %i7
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i0, %o0
+; UNOPT-NEXT: mov %g0, %i0
+; UNOPT-NEXT: st %i0, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT: sethi 4000, %i0
+; UNOPT-NEXT: cmp %o0, 0
+; UNOPT-NEXT: bne .LBB14_2
+; UNOPT-NEXT: st %i0, [%fp+-4]
+; UNOPT-NEXT: ! %bb.1: ! %entry
+; UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT: .LBB14_2: ! %entry
+; UNOPT-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore
entry:
-;CHECK-LABEL: restore_sethi_large:
-;CHECK: sethi 4000, %i0
-;CHECK-NOT: restore %g0, %g0, %g0
-;CHECK: restore
%0 = tail call i32 @bar(i32 %a) nounwind
%1 = icmp ne i32 %0, 0
%2 = select i1 %1, i32 4096000, i32 0
@@ -185,10 +587,53 @@ entry:
}
define i32 @test_generic_inst(i32 %arg) #0 {
-;CHECK-LABEL: test_generic_inst:
-;CHECK: ! fake_use: {{.*}}
-;CHECK: bne {{.*}}
-;CHECK-NEXT: nop
+; CHECK-LABEL: test_generic_inst:
+; CHECK: ! %bb.0:
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: call bar
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: andcc %o0, 1, %g0
+; CHECK-NEXT: ! fake_use: $i0
+; CHECK-NEXT: bne .LBB15_2
+; CHECK-NEXT: nop
+; CHECK-NEXT: ! %bb.1: ! %true
+; CHECK-NEXT: call bar
+; CHECK-NEXT: nop
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %o0, %o0
+; CHECK-NEXT: .LBB15_2: ! %false
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %o0, 1, %o0
+;
+; UNOPT-LABEL: test_generic_inst:
+; UNOPT: ! %bb.0:
+; UNOPT-NEXT: save %sp, -104, %sp
+; UNOPT-NEXT: mov %i0, %i1
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: mov %i1, %o0
+; UNOPT-NEXT: mov %o0, %i0
+; UNOPT-NEXT: st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT: and %o0, 1, %i0
+; UNOPT-NEXT: ! fake_use: $i1
+; UNOPT-NEXT: cmp %i0, 0
+; UNOPT-NEXT: bne .LBB15_2
+; UNOPT-NEXT: nop
+; UNOPT-NEXT: ba .LBB15_1
+; UNOPT-NEXT: nop
+; UNOPT-NEXT: .LBB15_1: ! %true
+; UNOPT-NEXT: call bar
+; UNOPT-NEXT: ld [%fp+-4], %o0
+; UNOPT-NEXT: ba .LBB15_3
+; UNOPT-NEXT: st %o0, [%fp+-8]
+; UNOPT-NEXT: .LBB15_2: ! %false
+; UNOPT-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: add %i0, 1, %i0
+; UNOPT-NEXT: ba .LBB15_3
+; UNOPT-NEXT: st %i0, [%fp+-8]
+; UNOPT-NEXT: .LBB15_3: ! %cont
+; UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT: ret
+; UNOPT-NEXT: restore
%bar1 = call i32 @bar(i32 %arg)
%even = and i32 %bar1, 1
%cmp = icmp eq i32 %even, 0
>From ced81290d81c239c7d34437d936bd22a278bedb7 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Wed, 17 Dec 2025 11:37:42 +0700
Subject: [PATCH 2/3] Update tail call handling
---
llvm/lib/Target/Sparc/DelaySlotFiller.cpp | 12 +++++++++---
llvm/lib/Target/Sparc/SparcInstrInfo.td | 4 ++--
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index c8caf7dffad2a..7549966ed7e92 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -11,6 +11,7 @@
// NOP is placed.
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/SparcMCTargetDesc.h"
#include "Sparc.h"
#include "SparcSubtarget.h"
#include "llvm/ADT/SmallSet.h"
@@ -406,9 +407,11 @@ static bool combineRestoreADD(MachineBasicBlock &MBB,
// Check whether it uses %o7 as its source and the corresponding branch
// instruction is a call.
MachineBasicBlock::iterator LastInst = MBB.getFirstTerminator();
- bool IsCall = LastInst != MBB.end() && LastInst->isCall();
+ unsigned CallOpc = LastInst->getOpcode();
+ bool IsCall = LastInst != MBB.end() &&
+ (LastInst->isCall() || CallOpc == SP::TAIL_CALL ||
+ CallOpc == SP::TAIL_CALLri);
- // Check whether it uses %o7 as its source.
if (IsCall && AddMI->getOpcode() == SP::ADDrr &&
(AddMI->getOperand(1).getReg() == SP::O7 ||
AddMI->getOperand(2).getReg() == SP::O7))
@@ -460,7 +463,10 @@ static bool combineRestoreOR(MachineBasicBlock &MBB,
// Check whether it uses %o7 as its source and the corresponding branch
// instruction is a call.
MachineBasicBlock::iterator LastInst = MBB.getFirstTerminator();
- bool IsCall = LastInst != MBB.end() && LastInst->isCall();
+ unsigned CallOpc = LastInst->getOpcode();
+ bool IsCall = LastInst != MBB.end() &&
+ (LastInst->isCall() || CallOpc == SP::TAIL_CALL ||
+ CallOpc == SP::TAIL_CALLri);
if (IsCall && OrMI->getOpcode() == SP::ORrr &&
(OrMI->getOperand(1).getReg() == SP::O7 ||
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index c08599402daa8..107817fcab6df 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -1589,7 +1589,7 @@ let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
// Instructions for tail calls.
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1,
- isTerminator = 1, isBarrier = 1, isCall = 1 in {
+ isTerminator = 1, isBarrier = 1 in {
def TAIL_CALL : InstSP<(outs), (ins calltarget:$disp, variable_ops),
"call $disp",
[(tailcall tglobaladdr:$disp)]> {
@@ -1603,7 +1603,7 @@ def : Pat<(tailcall (iPTR texternalsym:$dst)),
(TAIL_CALL texternalsym:$dst)>;
let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1, isTerminator = 1,
- isBarrier = 1, isCall = 1, rd = 0 in {
+ isBarrier = 1, rd = 0 in {
def TAIL_CALLri : F3_2<2, 0b111000,
(outs), (ins (MEMri $rs1, $simm13):$addr, variable_ops),
"jmp $addr",
>From bb348dd82a795fae9e9781a2e7ae6dd60ab15966 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Wed, 17 Dec 2025 18:20:21 +0700
Subject: [PATCH 3/3] Remove stray include
---
llvm/lib/Target/Sparc/DelaySlotFiller.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 7549966ed7e92..67a19a9fde7f7 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -11,7 +11,6 @@
// NOP is placed.
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/SparcMCTargetDesc.h"
#include "Sparc.h"
#include "SparcSubtarget.h"
#include "llvm/ADT/SmallSet.h"
More information about the llvm-commits
mailing list