[llvm] [SPARC] Prevent RESTORE from sourcing from %o7 in call delay slots (PR #172593)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 17 03:21:13 PST 2025


https://github.com/koachan updated https://github.com/llvm/llvm-project/pull/172593

>From 5b9d25d6e9cd6bf7c7e4ef27e687e9019e46ffbc Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Wed, 17 Dec 2025 08:48:44 +0700
Subject: [PATCH 1/3] [SPARC] Prevent RESTORE from sourcing from %o7 in call
 delay slots

Combining instructions that reads from %o7 with a RESTORE in call delay slots
will result in a RESTORE instruction that reads from %o7, which has been
overwritten by the call instruction, resulting in junk values being produced.

This should fix the issue with `test-suite::lencod.test`.
---
 llvm/lib/Target/Sparc/DelaySlotFiller.cpp     |  49 +-
 llvm/lib/Target/Sparc/SparcInstrInfo.td       |   4 +-
 .../CodeGen/SPARC/2011-01-19-DelaySlot.ll     | 563 ++++++++++++++++--
 3 files changed, 547 insertions(+), 69 deletions(-)

diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 024030d196ee3..c8caf7dffad2a 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -390,10 +390,10 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
   return true;
 }
 
-static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI,
+static bool combineRestoreADD(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator RestoreMI,
                               MachineBasicBlock::iterator AddMI,
-                              const TargetInstrInfo *TII)
-{
+                              const TargetInstrInfo *TII) {
   // Before:  add  <op0>, <op1>, %i[0-7]
   //          restore %g0, %g0, %i[0-7]
   //
@@ -403,6 +403,21 @@ static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI,
   if (reg < SP::I0 || reg > SP::I7)
     return false;
 
+  // Check whether it uses %o7 as its source and the corresponding branch
+  // instruction is a call.
+  MachineBasicBlock::iterator LastInst = MBB.getFirstTerminator();
+  bool IsCall = LastInst != MBB.end() && LastInst->isCall();
+
+  // Check whether it uses %o7 as its source.
+  if (IsCall && AddMI->getOpcode() == SP::ADDrr &&
+      (AddMI->getOperand(1).getReg() == SP::O7 ||
+       AddMI->getOperand(2).getReg() == SP::O7))
+    return false;
+
+  if (IsCall && AddMI->getOpcode() == SP::ADDri &&
+      AddMI->getOperand(1).getReg() == SP::O7)
+    return false;
+
   // Erase RESTORE.
   RestoreMI->eraseFromParent();
 
@@ -417,10 +432,10 @@ static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI,
   return true;
 }
 
-static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI,
+static bool combineRestoreOR(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator RestoreMI,
                              MachineBasicBlock::iterator OrMI,
-                             const TargetInstrInfo *TII)
-{
+                             const TargetInstrInfo *TII) {
   // Before:  or  <op0>, <op1>, %i[0-7]
   //          restore %g0, %g0, %i[0-7]
   //    and <op0> or <op1> is zero,
@@ -442,6 +457,20 @@ static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI,
       && (!OrMI->getOperand(2).isImm() || OrMI->getOperand(2).getImm() != 0))
     return false;
 
+  // Check whether it uses %o7 as its source and the corresponding branch
+  // instruction is a call.
+  MachineBasicBlock::iterator LastInst = MBB.getFirstTerminator();
+  bool IsCall = LastInst != MBB.end() && LastInst->isCall();
+
+  if (IsCall && OrMI->getOpcode() == SP::ORrr &&
+      (OrMI->getOperand(1).getReg() == SP::O7 ||
+       OrMI->getOperand(2).getReg() == SP::O7))
+    return false;
+
+  if (IsCall && OrMI->getOpcode() == SP::ORrr &&
+      OrMI->getOperand(1).getReg() == SP::O7)
+    return false;
+
   // Erase RESTORE.
   RestoreMI->eraseFromParent();
 
@@ -520,9 +549,13 @@ bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB,
   switch (PrevInst->getOpcode()) {
   default: break;
   case SP::ADDrr:
-  case SP::ADDri: return combineRestoreADD(MBBI, PrevInst, TII); break;
+  case SP::ADDri:
+    return combineRestoreADD(MBB, MBBI, PrevInst, TII);
+    break;
   case SP::ORrr:
-  case SP::ORri:  return combineRestoreOR(MBBI, PrevInst, TII); break;
+  case SP::ORri:
+    return combineRestoreOR(MBB, MBBI, PrevInst, TII);
+    break;
   case SP::SETHIi: return combineRestoreSETHIi(MBBI, PrevInst, TII); break;
   }
   // It cannot combine with the previous instruction.
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 107817fcab6df..c08599402daa8 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -1589,7 +1589,7 @@ let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
 // Instructions for tail calls.
 //===----------------------------------------------------------------------===//
 let isCodeGenOnly = 1, isReturn = 1,  hasDelaySlot = 1,
-    isTerminator = 1, isBarrier = 1 in {
+    isTerminator = 1, isBarrier = 1, isCall = 1 in {
   def TAIL_CALL : InstSP<(outs), (ins calltarget:$disp, variable_ops),
                          "call $disp",
                          [(tailcall tglobaladdr:$disp)]> {
@@ -1603,7 +1603,7 @@ def : Pat<(tailcall (iPTR texternalsym:$dst)),
           (TAIL_CALL texternalsym:$dst)>;
 
 let isCodeGenOnly = 1, isReturn = 1,  hasDelaySlot = 1,  isTerminator = 1,
-    isBarrier = 1, rd = 0 in {
+    isBarrier = 1, isCall = 1, rd = 0 in {
   def TAIL_CALLri : F3_2<2, 0b111000,
                          (outs), (ins (MEMri $rs1, $simm13):$addr, variable_ops),
                          "jmp $addr",
diff --git a/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index 767ef7eb510e6..518020bc9ca3a 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -1,32 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ;RUN: llc -mtriple=sparc < %s -verify-machineinstrs | FileCheck %s
 ;RUN: llc -mtriple=sparc -O0 < %s -verify-machineinstrs | FileCheck %s -check-prefix=UNOPT
 
 target triple = "sparc-unknown-linux-gnu"
 
 define i32 @test(i32 %a) #0 {
+; CHECK-LABEL: test:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: test:
+; UNOPT:       ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -96, %sp
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i0, %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %g0, %o0, %o0
 entry:
-; CHECK: test
-; CHECK: call bar
-; CHECK-NOT: nop
-; CHECK: ret
-; CHECK-NEXT: restore
   %0 = tail call i32 @bar(i32 %a) nounwind
   ret i32 %0
 }
 
 define i32 @test_jmpl(ptr nocapture %f, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: test_jmpl:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    mov %i2, %o1
+; CHECK-NEXT:    call %i0
+; CHECK-NEXT:    mov %i1, %o0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: test_jmpl:
+; UNOPT:       ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -96, %sp
+; UNOPT-NEXT:    mov %i2, %o1
+; UNOPT-NEXT:    call %i0
+; UNOPT-NEXT:    mov %i1, %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %g0, %o0, %o0
 entry:
-; CHECK:      test_jmpl
-; CHECK:      call
-; CHECK-NOT:  nop
-; CHECK:      ret
-; CHECK-NEXT: restore
   %0 = tail call i32 %f(i32 %a, i32 %b) nounwind
   ret i32 %0
 }
 
 define i32 @test_loop(i32 %a, i32 %b) nounwind readnone {
-; CHECK: test_loop
+; CHECK-LABEL: test_loop:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    cmp %o1, 1
+; CHECK-NEXT:    bl .LBB2_3
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  ! %bb.1: ! %bb.preheader
+; CHECK-NEXT:    mov %g0, %o2
+; CHECK-NEXT:    mov %g0, %o3
+; CHECK-NEXT:    mov 1, %o4
+; CHECK-NEXT:  .LBB2_2: ! %bb
+; CHECK-NEXT:    ! =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    andn %o4, %o3, %o5
+; CHECK-NEXT:    sll %o2, %o5, %o5
+; CHECK-NEXT:    add %o5, %o0, %o0
+; CHECK-NEXT:    add %o3, 1, %o3
+; CHECK-NEXT:    cmp %o1, %o3
+; CHECK-NEXT:    bne .LBB2_2
+; CHECK-NEXT:    add %o2, %o1, %o2
+; CHECK-NEXT:  .LBB2_3: ! %bb5
+; CHECK-NEXT:    retl
+; CHECK-NEXT:    nop
+;
+; UNOPT-LABEL: test_loop:
+; UNOPT:       ! %bb.0: ! %entry
+; UNOPT-NEXT:    add %sp, -104, %sp
+; UNOPT-NEXT:    mov %o1, %o2
+; UNOPT-NEXT:    st %o2, [%sp+92] ! 4-byte Folded Spill
+; UNOPT-NEXT:    mov %o0, %o1
+; UNOPT-NEXT:    mov %g0, %o0
+; UNOPT-NEXT:    cmp %o2, 1
+; UNOPT-NEXT:    st %o1, [%sp+96] ! 4-byte Folded Spill
+; UNOPT-NEXT:    bl .LBB2_2
+; UNOPT-NEXT:    st %o0, [%sp+100]
+; UNOPT-NEXT:    ba .LBB2_1
+; UNOPT-NEXT:    nop
+; UNOPT-NEXT:  .LBB2_1: ! %bb
+; UNOPT-NEXT:    ! =>This Inner Loop Header: Depth=1
+; UNOPT-NEXT:    ld [%sp+100], %o0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    ld [%sp+96], %o3 ! 4-byte Folded Reload
+; UNOPT-NEXT:    ld [%sp+92], %o2 ! 4-byte Folded Reload
+; UNOPT-NEXT:    smul %o0, %o2, %o1
+; UNOPT-NEXT:    mov 1, %o4
+; UNOPT-NEXT:    andn %o4, %o0, %o4
+; UNOPT-NEXT:    sll %o1, %o4, %o1
+; UNOPT-NEXT:    add %o1, %o3, %o1
+; UNOPT-NEXT:    add %o0, 1, %o0
+; UNOPT-NEXT:    cmp %o0, %o2
+; UNOPT-NEXT:    st %o1, [%sp+96] ! 4-byte Folded Spill
+; UNOPT-NEXT:    bne .LBB2_1
+; UNOPT-NEXT:    st %o0, [%sp+100]
+; UNOPT-NEXT:    ba .LBB2_2
+; UNOPT-NEXT:    nop
+; UNOPT-NEXT:  .LBB2_2: ! %bb5
+; UNOPT-NEXT:    ld [%sp+96], %o0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    retl
+; UNOPT-NEXT:    add %sp, 104, %sp
 entry:
   %0 = icmp sgt i32 %b, 0
   br i1 %0, label %bb, label %bb5
@@ -41,26 +119,59 @@ bb:                                               ; preds = %entry, %bb
   %a_addr.0 = add i32 %.pn, %a_addr.18
   %3 = add nsw i32 %1, 1
   %exitcond = icmp eq i32 %3, %b
-;CHECK:      cmp
-;CHECK:      bne
-;CHECK-NOT:  nop
   br i1 %exitcond, label %bb5, label %bb
 
 bb5:                                              ; preds = %bb, %entry
   %a_addr.1.lcssa = phi i32 [ %a, %entry ], [ %a_addr.0, %bb ]
-;CHECK:      retl
-;CHECK-NOT: restore
   ret i32 %a_addr.1.lcssa
 }
 
 define i32 @test_inlineasm(i32 %a) #0 {
+; CHECK-LABEL: test_inlineasm:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    cmp %i0, -1
+; CHECK-NEXT:    !APP
+; CHECK-NEXT:    sethi 0, %g0
+; CHECK-NEXT:    !NO_APP
+; CHECK-NEXT:    ble .LBB3_2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  ! %bb.1: ! %bb1
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    ba .LBB3_3
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB3_2: ! %bb
+; CHECK-NEXT:    call foo
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB3_3: ! %bb
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: test_inlineasm:
+; UNOPT:       ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -96, %sp
+; UNOPT-NEXT:    st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT:    !APP
+; UNOPT-NEXT:    sethi 0, %g0
+; UNOPT-NEXT:    !NO_APP
+; UNOPT-NEXT:    cmp %i0, -1
+; UNOPT-NEXT:    bg .LBB3_2
+; UNOPT-NEXT:    nop
+; UNOPT-NEXT:    ba .LBB3_1
+; UNOPT-NEXT:    nop
+; UNOPT-NEXT:  .LBB3_1: ! %bb
+; UNOPT-NEXT:    call foo
+; UNOPT-NEXT:    ld [%fp+-4], %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %g0, %o0, %o0
+; UNOPT-NEXT:  .LBB3_2: ! %bb1
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    ld [%fp+-4], %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %g0, %o0, %o0
 entry:
-;CHECK-LABEL:      test_inlineasm:
-;CHECK: cmp
-;CHECK:      sethi
-;CHECK:      !NO_APP
-;CHECK-NEXT: ble
-;CHECK-NEXT: nop
   tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
   %0 = icmp slt i32 %a, 0
   br i1 %0, label %bb, label %bb1
@@ -80,22 +191,57 @@ declare i32 @bar(i32)
 
 
 define i32 @test_implicit_def() #0 {
+; CHECK-LABEL: test_implicit_def:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    call func
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %g0, %o0
+;
+; UNOPT-LABEL: test_implicit_def:
+; UNOPT:       ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -96, %sp
+; UNOPT-NEXT:    ! implicit-def: $o0
+; UNOPT-NEXT:    call func
+; UNOPT-NEXT:    nop
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %g0, %g0, %o0
 entry:
-;UNOPT-LABEL:       test_implicit_def:
-;UNOPT:       call func
-;UNOPT-NEXT:  nop
   %0 = tail call i32 @func(ptr undef) nounwind
   ret i32 0
 }
 
 define i32 @prevent_o7_in_call_delay_slot(i32 %i0) #0 {
+; CHECK-LABEL: prevent_o7_in_call_delay_slot:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    add %i0, 2, %o5
+; CHECK-NEXT:    add %i0, 3, %o7
+; CHECK-NEXT:    !APP
+; CHECK-NEXT:    !NO_APP
+; CHECK-NEXT:    add %o5, %o7, %o0
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: prevent_o7_in_call_delay_slot:
+; UNOPT:       ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -104, %sp
+; UNOPT-NEXT:    add %i0, 2, %o5
+; UNOPT-NEXT:    st %o5, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT:    add %i0, 3, %o7
+; UNOPT-NEXT:    st %o7, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT:    !APP
+; UNOPT-NEXT:    !NO_APP
+; UNOPT-NEXT:    ld [%fp+-8], %i1 ! 4-byte Folded Reload
+; UNOPT-NEXT:    ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    add %i0, %i1, %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %g0, %o0, %o0
 entry:
-;CHECK-LABEL:       prevent_o7_in_call_delay_slot:
-;CHECK:       add %i0, 2, %o5
-;CHECK:       add %i0, 3, %o7
-;CHECK:       add %o5, %o7, %o0
-;CHECK:       call bar
-;CHECK-NEXT:  nop
   %0 = add nsw i32 %i0, 2
   %1 = add nsw i32 %i0, 3
   tail call void asm sideeffect "", "r,r,~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o6},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7}"(i32 %0, i32 %1)
@@ -104,46 +250,187 @@ entry:
   ret i32 %3
 }
 
+define i32 @prevent_o7_in_restore_add_in_call_delay_slot(i32 %i0) #0 {
+; CHECK-LABEL: prevent_o7_in_restore_add_in_call_delay_slot:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    add %i0, 2, %o5
+; CHECK-NEXT:    add %i0, 3, %o7
+; CHECK-NEXT:    !APP
+; CHECK-NEXT:    !NO_APP
+; CHECK-NEXT:    add %o5, %o7, %i0
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore
+;
+; UNOPT-LABEL: prevent_o7_in_restore_add_in_call_delay_slot:
+; UNOPT:       ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -104, %sp
+; UNOPT-NEXT:    add %i0, 2, %o5
+; UNOPT-NEXT:    st %o5, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT:    add %i0, 3, %o7
+; UNOPT-NEXT:    st %o7, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT:    !APP
+; UNOPT-NEXT:    !NO_APP
+; UNOPT-NEXT:    ld [%fp+-8], %i1 ! 4-byte Folded Reload
+; UNOPT-NEXT:    ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    add %i0, %i1, %i0
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i0, %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore
+entry:
+  %0 = add nsw i32 %i0, 2
+  %1 = add nsw i32 %i0, 3
+  tail call void asm sideeffect "", "r,r,~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o6},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7}"(i32 %0, i32 %1)
+  %2 = add nsw i32 %0, %1
+  %3 = tail call i32 @bar(i32 %2)
+  ret i32 %2
+}
+
+define i32 @prevent_o7_in_restore_or_in_call_delay_slot(i32 %i0) #0 {
+; CHECK-LABEL: prevent_o7_in_restore_or_in_call_delay_slot:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    add %i0, 2, %o7
+; CHECK-NEXT:    !APP
+; CHECK-NEXT:    !NO_APP
+; CHECK-NEXT:    mov %o7, %o0
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: prevent_o7_in_restore_or_in_call_delay_slot:
+; UNOPT:       ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -96, %sp
+; UNOPT-NEXT:    add %i0, 2, %o7
+; UNOPT-NEXT:    st %o7, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT:    !APP
+; UNOPT-NEXT:    !NO_APP
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    ld [%fp+-4], %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %g0, %o0, %o0
+entry:
+  %0 = add nsw i32 %i0, 2
+  tail call void asm sideeffect "", "r,~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7}"(i32 %0)
+  %1 = tail call i32 @bar(i32 %0)
+  ret i32 %1
+}
 
 declare i32 @func(ptr)
 
 
 define i32 @restore_add(i32 %a, i32 %b) {
+; CHECK-LABEL: restore_add:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    .cfi_def_cfa_register %fp
+; CHECK-NEXT:    .cfi_window_save
+; CHECK-NEXT:    .cfi_register %o7, %i7
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %o0, %i1, %o0
+;
+; UNOPT-LABEL: restore_add:
+; UNOPT:         .cfi_startproc
+; UNOPT-NEXT:  ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -96, %sp
+; UNOPT-NEXT:    .cfi_def_cfa_register %fp
+; UNOPT-NEXT:    .cfi_window_save
+; UNOPT-NEXT:    .cfi_register %o7, %i7
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i0, %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %o0, %i1, %o0
 entry:
-;CHECK-LABEL:  restore_add:
-;CHECK:  ret
-;CHECK:  restore %o0, %i1, %o0
   %0 = tail call i32 @bar(i32 %a) nounwind
   %1 = add nsw i32 %0, %b
   ret i32 %1
 }
 
 define i32 @restore_add_imm(i32 %a) {
+; CHECK-LABEL: restore_add_imm:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    .cfi_def_cfa_register %fp
+; CHECK-NEXT:    .cfi_window_save
+; CHECK-NEXT:    .cfi_register %o7, %i7
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %o0, 20, %o0
+;
+; UNOPT-LABEL: restore_add_imm:
+; UNOPT:         .cfi_startproc
+; UNOPT-NEXT:  ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -96, %sp
+; UNOPT-NEXT:    .cfi_def_cfa_register %fp
+; UNOPT-NEXT:    .cfi_window_save
+; UNOPT-NEXT:    .cfi_register %o7, %i7
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i0, %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %o0, 20, %o0
 entry:
-;CHECK-LABEL:  restore_add_imm:
-;CHECK:  ret
-;CHECK:  restore %o0, 20, %o0
   %0 = tail call i32 @bar(i32 %a) nounwind
   %1 = add nsw i32 %0, 20
   ret i32 %1
 }
 
 define i32 @restore_or(i32 %a) #0 {
+; CHECK-LABEL: restore_or:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %o0, %o0
+;
+; UNOPT-LABEL: restore_or:
+; UNOPT:       ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -96, %sp
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i0, %o0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore %g0, %o0, %o0
 entry:
-;CHECK-LABEL:  restore_or:
-;CHECK:  ret
-;CHECK:  restore %g0, %o0, %o0
   %0 = tail call i32 @bar(i32 %a) nounwind
   ret i32 %0
 }
 
 define i32 @restore_or_imm(i32 %a) {
+; CHECK-LABEL: restore_or_imm:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    .cfi_def_cfa_register %fp
+; CHECK-NEXT:    .cfi_window_save
+; CHECK-NEXT:    .cfi_register %o7, %i7
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    or %o0, 20, %i0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore
+;
+; UNOPT-LABEL: restore_or_imm:
+; UNOPT:         .cfi_startproc
+; UNOPT-NEXT:  ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -96, %sp
+; UNOPT-NEXT:    .cfi_def_cfa_register %fp
+; UNOPT-NEXT:    .cfi_window_save
+; UNOPT-NEXT:    .cfi_register %o7, %i7
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i0, %o0
+; UNOPT-NEXT:    or %o0, 20, %i0
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore
 entry:
-;CHECK-LABEL:  restore_or_imm:
-;CHECK:  or %o0, 20, %i0
-;CHECK:  ret
-;CHECK-NOT:  restore %g0, %g0, %g0
-;CHECK:  restore
   %0 = tail call i32 @bar(i32 %a) nounwind
   %1 = or i32 %0, 20
   ret i32 %1
@@ -151,10 +438,48 @@ entry:
 
 
 define i32 @restore_sethi(i32 %a) {
+; CHECK-LABEL: restore_sethi:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    .cfi_def_cfa_register %fp
+; CHECK-NEXT:    .cfi_window_save
+; CHECK-NEXT:    .cfi_register %o7, %i7
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    cmp %o0, 0
+; CHECK-NEXT:    bne .LBB12_2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  ! %bb.1: ! %entry
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %g0, %o0
+; CHECK-NEXT:  .LBB12_2:
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, 3072, %o0
+;
+; UNOPT-LABEL: restore_sethi:
+; UNOPT:         .cfi_startproc
+; UNOPT-NEXT:  ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -104, %sp
+; UNOPT-NEXT:    .cfi_def_cfa_register %fp
+; UNOPT-NEXT:    .cfi_window_save
+; UNOPT-NEXT:    .cfi_register %o7, %i7
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i0, %o0
+; UNOPT-NEXT:    mov %g0, %i0
+; UNOPT-NEXT:    st %i0, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT:    sethi 3, %i0
+; UNOPT-NEXT:    cmp %o0, 0
+; UNOPT-NEXT:    bne .LBB12_2
+; UNOPT-NEXT:    st %i0, [%fp+-4]
+; UNOPT-NEXT:  ! %bb.1: ! %entry
+; UNOPT-NEXT:    ld [%fp+-8], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT:  .LBB12_2: ! %entry
+; UNOPT-NEXT:    ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore
 entry:
-;CHECK-LABEL: restore_sethi:
-;CHECK-NOT: sethi  3
-;CHECK: restore %g0, 3072, %o0
   %0 = tail call i32 @bar(i32 %a) nounwind
   %1 = icmp ne i32 %0, 0
   %2 = select i1 %1, i32 3072, i32 0
@@ -162,10 +487,49 @@ entry:
 }
 
 define i32 @restore_sethi_3bit(i32 %a) {
+; CHECK-LABEL: restore_sethi_3bit:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    .cfi_def_cfa_register %fp
+; CHECK-NEXT:    .cfi_window_save
+; CHECK-NEXT:    .cfi_register %o7, %i7
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    cmp %o0, 0
+; CHECK-NEXT:    bne .LBB13_2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  ! %bb.1: ! %entry
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %g0, %o0
+; CHECK-NEXT:  .LBB13_2:
+; CHECK-NEXT:    sethi 6, %i0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore
+;
+; UNOPT-LABEL: restore_sethi_3bit:
+; UNOPT:         .cfi_startproc
+; UNOPT-NEXT:  ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -104, %sp
+; UNOPT-NEXT:    .cfi_def_cfa_register %fp
+; UNOPT-NEXT:    .cfi_window_save
+; UNOPT-NEXT:    .cfi_register %o7, %i7
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i0, %o0
+; UNOPT-NEXT:    mov %g0, %i0
+; UNOPT-NEXT:    st %i0, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT:    sethi 6, %i0
+; UNOPT-NEXT:    cmp %o0, 0
+; UNOPT-NEXT:    bne .LBB13_2
+; UNOPT-NEXT:    st %i0, [%fp+-4]
+; UNOPT-NEXT:  ! %bb.1: ! %entry
+; UNOPT-NEXT:    ld [%fp+-8], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT:  .LBB13_2: ! %entry
+; UNOPT-NEXT:    ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore
 entry:
-;CHECK-LABEL: restore_sethi_3bit:
-;CHECK: sethi  6
-;CHECK-NOT: restore %g0, 6144, %o0
   %0 = tail call i32 @bar(i32 %a) nounwind
   %1 = icmp ne i32 %0, 0
   %2 = select i1 %1, i32 6144, i32 0
@@ -173,11 +537,49 @@ entry:
 }
 
 define i32 @restore_sethi_large(i32 %a) {
+; CHECK-LABEL: restore_sethi_large:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    .cfi_def_cfa_register %fp
+; CHECK-NEXT:    .cfi_window_save
+; CHECK-NEXT:    .cfi_register %o7, %i7
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    cmp %o0, 0
+; CHECK-NEXT:    bne .LBB14_2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  ! %bb.1: ! %entry
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %g0, %o0
+; CHECK-NEXT:  .LBB14_2:
+; CHECK-NEXT:    sethi 4000, %i0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore
+;
+; UNOPT-LABEL: restore_sethi_large:
+; UNOPT:         .cfi_startproc
+; UNOPT-NEXT:  ! %bb.0: ! %entry
+; UNOPT-NEXT:    save %sp, -104, %sp
+; UNOPT-NEXT:    .cfi_def_cfa_register %fp
+; UNOPT-NEXT:    .cfi_window_save
+; UNOPT-NEXT:    .cfi_register %o7, %i7
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i0, %o0
+; UNOPT-NEXT:    mov %g0, %i0
+; UNOPT-NEXT:    st %i0, [%fp+-8] ! 4-byte Folded Spill
+; UNOPT-NEXT:    sethi 4000, %i0
+; UNOPT-NEXT:    cmp %o0, 0
+; UNOPT-NEXT:    bne .LBB14_2
+; UNOPT-NEXT:    st %i0, [%fp+-4]
+; UNOPT-NEXT:  ! %bb.1: ! %entry
+; UNOPT-NEXT:    ld [%fp+-8], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT:  .LBB14_2: ! %entry
+; UNOPT-NEXT:    ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore
 entry:
-;CHECK-LABEL: restore_sethi_large:
-;CHECK: sethi  4000, %i0
-;CHECK-NOT: restore %g0, %g0, %g0
-;CHECK:     restore
   %0 = tail call i32 @bar(i32 %a) nounwind
   %1 = icmp ne i32 %0, 0
   %2 = select i1 %1, i32 4096000, i32 0
@@ -185,10 +587,53 @@ entry:
 }
 
 define i32 @test_generic_inst(i32 %arg) #0 {
-;CHECK-LABEL: test_generic_inst:
-;CHECK: ! fake_use: {{.*}}
-;CHECK: bne {{.*}}
-;CHECK-NEXT: nop
+; CHECK-LABEL: test_generic_inst:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    andcc %o0, 1, %g0
+; CHECK-NEXT:    ! fake_use: $i0
+; CHECK-NEXT:    bne .LBB15_2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  ! %bb.1: ! %true
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %o0, %o0
+; CHECK-NEXT:  .LBB15_2: ! %false
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %o0, 1, %o0
+;
+; UNOPT-LABEL: test_generic_inst:
+; UNOPT:       ! %bb.0:
+; UNOPT-NEXT:    save %sp, -104, %sp
+; UNOPT-NEXT:    mov %i0, %i1
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    mov %i1, %o0
+; UNOPT-NEXT:    mov %o0, %i0
+; UNOPT-NEXT:    st %i0, [%fp+-4] ! 4-byte Folded Spill
+; UNOPT-NEXT:    and %o0, 1, %i0
+; UNOPT-NEXT:    ! fake_use: $i1
+; UNOPT-NEXT:    cmp %i0, 0
+; UNOPT-NEXT:    bne .LBB15_2
+; UNOPT-NEXT:    nop
+; UNOPT-NEXT:    ba .LBB15_1
+; UNOPT-NEXT:    nop
+; UNOPT-NEXT:  .LBB15_1: ! %true
+; UNOPT-NEXT:    call bar
+; UNOPT-NEXT:    ld [%fp+-4], %o0
+; UNOPT-NEXT:    ba .LBB15_3
+; UNOPT-NEXT:    st %o0, [%fp+-8]
+; UNOPT-NEXT:  .LBB15_2: ! %false
+; UNOPT-NEXT:    ld [%fp+-4], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    add %i0, 1, %i0
+; UNOPT-NEXT:    ba .LBB15_3
+; UNOPT-NEXT:    st %i0, [%fp+-8]
+; UNOPT-NEXT:  .LBB15_3: ! %cont
+; UNOPT-NEXT:    ld [%fp+-8], %i0 ! 4-byte Folded Reload
+; UNOPT-NEXT:    ret
+; UNOPT-NEXT:    restore
   %bar1 = call i32 @bar(i32 %arg)
   %even = and i32 %bar1, 1
   %cmp = icmp eq i32 %even, 0

>From ced81290d81c239c7d34437d936bd22a278bedb7 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Wed, 17 Dec 2025 11:37:42 +0700
Subject: [PATCH 2/3] Update tail call handling

---
 llvm/lib/Target/Sparc/DelaySlotFiller.cpp | 12 +++++++++---
 llvm/lib/Target/Sparc/SparcInstrInfo.td   |  4 ++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index c8caf7dffad2a..7549966ed7e92 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -11,6 +11,7 @@
 // NOP is placed.
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "Sparc.h"
 #include "SparcSubtarget.h"
 #include "llvm/ADT/SmallSet.h"
@@ -406,9 +407,11 @@ static bool combineRestoreADD(MachineBasicBlock &MBB,
   // Check whether it uses %o7 as its source and the corresponding branch
   // instruction is a call.
   MachineBasicBlock::iterator LastInst = MBB.getFirstTerminator();
-  bool IsCall = LastInst != MBB.end() && LastInst->isCall();
+  unsigned CallOpc = LastInst->getOpcode();
+  bool IsCall = LastInst != MBB.end() &&
+                (LastInst->isCall() || CallOpc == SP::TAIL_CALL ||
+                 CallOpc == SP::TAIL_CALLri);
 
-  // Check whether it uses %o7 as its source.
   if (IsCall && AddMI->getOpcode() == SP::ADDrr &&
       (AddMI->getOperand(1).getReg() == SP::O7 ||
        AddMI->getOperand(2).getReg() == SP::O7))
@@ -460,7 +463,10 @@ static bool combineRestoreOR(MachineBasicBlock &MBB,
   // Check whether it uses %o7 as its source and the corresponding branch
   // instruction is a call.
   MachineBasicBlock::iterator LastInst = MBB.getFirstTerminator();
-  bool IsCall = LastInst != MBB.end() && LastInst->isCall();
+  unsigned CallOpc = LastInst->getOpcode();
+  bool IsCall = LastInst != MBB.end() &&
+                (LastInst->isCall() || CallOpc == SP::TAIL_CALL ||
+                 CallOpc == SP::TAIL_CALLri);
 
   if (IsCall && OrMI->getOpcode() == SP::ORrr &&
       (OrMI->getOperand(1).getReg() == SP::O7 ||
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index c08599402daa8..107817fcab6df 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -1589,7 +1589,7 @@ let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
 // Instructions for tail calls.
 //===----------------------------------------------------------------------===//
 let isCodeGenOnly = 1, isReturn = 1,  hasDelaySlot = 1,
-    isTerminator = 1, isBarrier = 1, isCall = 1 in {
+    isTerminator = 1, isBarrier = 1 in {
   def TAIL_CALL : InstSP<(outs), (ins calltarget:$disp, variable_ops),
                          "call $disp",
                          [(tailcall tglobaladdr:$disp)]> {
@@ -1603,7 +1603,7 @@ def : Pat<(tailcall (iPTR texternalsym:$dst)),
           (TAIL_CALL texternalsym:$dst)>;
 
 let isCodeGenOnly = 1, isReturn = 1,  hasDelaySlot = 1,  isTerminator = 1,
-    isBarrier = 1, isCall = 1, rd = 0 in {
+    isBarrier = 1, rd = 0 in {
   def TAIL_CALLri : F3_2<2, 0b111000,
                          (outs), (ins (MEMri $rs1, $simm13):$addr, variable_ops),
                          "jmp $addr",

>From bb348dd82a795fae9e9781a2e7ae6dd60ab15966 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan at protonmail.com>
Date: Wed, 17 Dec 2025 18:20:21 +0700
Subject: [PATCH 3/3] Remove stray include

---
 llvm/lib/Target/Sparc/DelaySlotFiller.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 7549966ed7e92..67a19a9fde7f7 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -11,7 +11,6 @@
 // NOP is placed.
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "Sparc.h"
 #include "SparcSubtarget.h"
 #include "llvm/ADT/SmallSet.h"



More information about the llvm-commits mailing list