[Mlir-commits] [clang] [llvm] [mlir] Delegate __builtin_setjmp FP save to backend on windows CFI targets (PR #186843)
Pyry Kovanen
llvmlistbot at llvm.org
Wed Mar 18 06:30:00 PDT 2026
https://github.com/pkova updated https://github.com/llvm/llvm-project/pull/186843
>From c62583add00fb5e6a29661471ff77992fd3085e6 Mon Sep 17 00:00:00 2001
From: pkova <pyry at urbit.org>
Date: Wed, 18 Mar 2026 15:26:45 +0200
Subject: [PATCH] Add @llvm.setjmp intrinsic to store FP, IP and SP on the
backend
---
clang/lib/CodeGen/CGBuiltin.cpp | 26 +--
.../CodeGen/SystemZ/builtin-setjmp-logjmp.c | 2 +-
clang/test/Sema/builtin-longjmp.c | 2 +-
llvm/include/llvm/CodeGen/ISDOpcodes.h | 6 +
llvm/include/llvm/IR/Intrinsics.td | 2 +
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +
.../SelectionDAG/SelectionDAGBuilder.cpp | 10 +
.../SelectionDAG/SelectionDAGDumper.cpp | 1 +
llvm/lib/Target/ARM/ARMISelLowering.cpp | 30 +++
llvm/lib/Target/ARM/ARMISelLowering.h | 3 +
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 158 +++++++++++++++
llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 +
llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 7 +-
llvm/lib/Target/PowerPC/PPCInstrInfo.td | 11 +-
.../Target/SystemZ/SystemZISelLowering.cpp | 12 ++
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 2 +
llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 4 +-
llvm/lib/Target/SystemZ/SystemZOperators.td | 2 +
llvm/lib/Target/VE/VEISelLowering.cpp | 145 ++++++++++++++
llvm/lib/Target/VE/VEISelLowering.h | 3 +
llvm/lib/Target/VE/VEInstrInfo.td | 7 +
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 1 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 180 +++++++++++++++++-
llvm/lib/Target/X86/X86ISelLowering.h | 3 +
llvm/lib/Target/X86/X86InstrCompiler.td | 8 +
llvm/lib/Target/X86/X86InstrFragments.td | 4 +
llvm/test/CodeGen/ARM/setjmp.ll | 47 +++++
llvm/test/CodeGen/PowerPC/setjmp.ll | 39 ++++
llvm/test/CodeGen/SystemZ/setjmp.ll | 35 ++++
llvm/test/CodeGen/VE/Scalar/setjmp.ll | 39 ++++
llvm/test/CodeGen/X86/setjmp.ll | 55 ++++++
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 76 ++++----
32 files changed, 858 insertions(+), 67 deletions(-)
create mode 100644 llvm/test/CodeGen/ARM/setjmp.ll
create mode 100644 llvm/test/CodeGen/PowerPC/setjmp.ll
create mode 100644 llvm/test/CodeGen/SystemZ/setjmp.ll
create mode 100644 llvm/test/CodeGen/VE/Scalar/setjmp.ll
create mode 100644 llvm/test/CodeGen/X86/setjmp.ll
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index df03e84ce9f81..34aedc111ee4c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4916,32 +4916,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
}
case Builtin::BI__builtin_setjmp: {
- // Buffer is a void**.
Address Buf = EmitPointerWithAlignment(E->getArg(0));
- if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
- // On this target, the back end fills in the context buffer completely.
- // It doesn't really matter if the frontend stores to the buffer before
- // calling setjmp, the back-end is going to overwrite them anyway.
- Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
- return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
- }
-
- // Store the frame pointer to the setjmp buffer.
- Value *FrameAddr = Builder.CreateCall(
- CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
- ConstantInt::get(Int32Ty, 0));
- Builder.CreateStore(FrameAddr, Buf);
-
- // Store the stack pointer to the setjmp buffer.
- Value *StackAddr = Builder.CreateStackSave();
- assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
-
- Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
- Builder.CreateStore(StackAddr, StackSaveSlot);
-
- // Call LLVM's EH setjmp, which is lightweight.
- Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
+ // The backend handles all buffer stores (FP, SP, IP) via @llvm.setjmp.
+ Function *F = CGM.getIntrinsic(Intrinsic::setjmp);
return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
}
case Builtin::BI__builtin_longjmp: {
diff --git a/clang/test/CodeGen/SystemZ/builtin-setjmp-logjmp.c b/clang/test/CodeGen/SystemZ/builtin-setjmp-logjmp.c
index 898891fa182ea..d2522c6bab7df 100644
--- a/clang/test/CodeGen/SystemZ/builtin-setjmp-logjmp.c
+++ b/clang/test/CodeGen/SystemZ/builtin-setjmp-logjmp.c
@@ -6,7 +6,7 @@ void *buf[20];
// CHECK-LABEL: define dso_local void @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.setjmp(ptr @buf)
// CHECK-NEXT: ret void
//
void foo()
diff --git a/clang/test/Sema/builtin-longjmp.c b/clang/test/Sema/builtin-longjmp.c
index 99463cf3385a1..b320ec0b6a2d2 100644
--- a/clang/test/Sema/builtin-longjmp.c
+++ b/clang/test/Sema/builtin-longjmp.c
@@ -21,7 +21,7 @@ jmp_buf buf;
// CHECK: call{{.*}} void @llvm.eh.sjlj.longjmp
// CHECK: define{{.*}} void @do_setjmp()
-// CHECK: call{{.*}} i32 @llvm.eh.sjlj.setjmp
+// CHECK: call{{.*}} i32 @llvm.setjmp
void do_jump(void) {
__builtin_longjmp(buf, 1); // expected-error {{__builtin_longjmp is not supported for the current target}}
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index fa578f733d4e8..2a49f30210b2c 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -161,6 +161,12 @@ enum NodeType {
/// and returns an outchain.
EH_SJLJ_SETJMP,
+ /// RESULT, OUTCHAIN = SETJMP(INCHAIN, buffer)
+ /// This corresponds to the setjmp intrinsic. Like EH_SJLJ_SETJMP but the
+ /// backend is responsible for storing all of FP, SP, and IP into the buffer
+ /// (the frontend does not emit any buffer stores).
+ SETJMP,
+
/// OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer)
/// This corresponds to the eh.sjlj.longjmp intrinsic.
/// It takes an input chain and a pointer to the jump buffer as inputs
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 4469ff155b854..5b309751fc517 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1571,6 +1571,8 @@ def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>;
def int_eh_sjlj_setup_dispatch : Intrinsic<[], []>;
+def int_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
+
//===---------------- Generic Variable Attribute Intrinsics----------------===//
//
def int_var_annotation : DefaultAttrsIntrinsic<
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5e54343f7f146..7b35825ba995c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1116,6 +1116,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_DWARF_CFA:
case ISD::EH_SJLJ_SETJMP:
+ case ISD::SETJMP:
case ISD::EH_SJLJ_LONGJMP:
case ISD::EH_SJLJ_SETUP_DISPATCH:
// These operations lie about being legal: when they claim to be legal,
@@ -3311,6 +3312,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Node->getOperand(0));
break;
case ISD::EH_SJLJ_SETJMP:
+ case ISD::SETJMP:
// If the target didn't expand this, just return 'zero' and preserve the
// chain.
Results.push_back(DAG.getConstant(0, dl, MVT::i32));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index eb55a68eaba84..8f5ed1c312f26 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6863,6 +6863,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MFI.setFunctionContextIndex(FI);
return;
}
+ case Intrinsic::setjmp: {
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ SDValue Op = DAG.getNode(ISD::SETJMP, sdl,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+ setValue(&I, Op.getValue(0));
+ DAG.setRoot(Op.getValue(1));
+ return;
+ }
case Intrinsic::eh_sjlj_setjmp: {
SDValue Ops[2];
Ops[0] = getRoot();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 7161dd299f830..890545c756351 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -160,6 +160,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_DWARF_CFA: return "EH_DWARF_CFA";
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::SETJMP: return "SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH";
case ISD::ConstantPool: return "ConstantPool";
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 970c962197ac0..ef7dce063ab69 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1195,6 +1195,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
@@ -3776,6 +3777,34 @@ ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(1), Val);
}
+SDValue
+ARMTargetLowering::LowerSETJMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Buf = Op.getOperand(1);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // Store FP into buf[0].
+ const ARMBaseRegisterInfo &ARI =
+ *static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
+ Register FrameReg = ARI.getFrameRegister(MF);
+ SDValue FP = DAG.getCopyFromReg(Chain, dl, FrameReg, PtrVT);
+ Chain = DAG.getStore(FP.getValue(1), dl, FP, Buf, MachinePointerInfo());
+
+ // Store SP into buf[2] (offset 8).
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, ARM::SP, PtrVT);
+ SDValue SPAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Buf,
+ DAG.getConstant(8, dl, PtrVT));
+ Chain = DAG.getStore(SP.getValue(1), dl, SP, SPAddr, MachinePointerInfo());
+
+ // Delegate to EH_SJLJ_SETJMP for IP store + return value.
+ SDValue Val = DAG.getConstant(0, dl, MVT::i32);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Chain, Buf, Val);
+}
+
SDValue
ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -10435,6 +10464,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::SETJMP: return LowerSETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index e58d872c548e4..314d831a686b8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -546,6 +546,7 @@ class VectorType;
SDValue Dst,
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG,
@@ -713,6 +714,8 @@ class VectorType;
void EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const;
+
+
MachineBasicBlock *EmitStructByval(MachineInstr &MI,
MachineBasicBlock *MBB) const;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1515ff2e13b85..fcafe7a056122 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -601,6 +601,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// your own exception handling based on them.
// LLVM/Clang supports zero-cost DWARF exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
@@ -7985,6 +7986,14 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
Op.getOperand(0), Op.getOperand(1));
}
+SDValue PPCTargetLowering::lowerSETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -12745,6 +12754,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
// Exception handling lowering.
case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::SETJMP: return lowerSETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
@@ -13577,6 +13587,151 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
return sinkMBB;
}
+MachineBasicBlock *
+PPCTargetLowering::emitSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI.getDebugLoc();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = ++MBB->getIterator();
+
+ Register DstReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+ Register mainDstReg = MRI.createVirtualRegister(RC);
+ Register restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy(MF->getDataLayout());
+ assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Buffer layout:
+ // buf[0] = Frame Pointer
+ // buf[1] = IP (return address / LR)
+ // buf[2] = Stack Pointer
+ // buf[3] = TOC pointer (R2, 64-bit ELF only)
+ // buf[4] = Base Pointer
+ const int64_t FPOffset = 0;
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
+
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ Register LabelReg = MRI.createVirtualRegister(PtrRC);
+ Register BufReg = MI.getOperand(1).getReg();
+
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+ // Store FP to buf[0] if we have a frame pointer.
+ // Note: hasFP() is unreliable here because it depends on getStackSize()
+ // which isn't known yet during ISel. Use needsFP() instead.
+ auto *TFI = static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
+ if (TFI->needsFP(*MF)) {
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(FP)
+ .addImm(FPOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
+ }
+
+ // Store SP to buf[2].
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(SP)
+ .addImm(SPOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
+
+ // Store TOC (R2) for 64-bit ELF.
+ if (Subtarget.is64BitELFABI()) {
+ setUsesTOCBasePtr(*MBB->getParent());
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
+ }
+
+ // Store BP.
+ unsigned BaseReg;
+ if (MF->getFunction().hasFnAttribute(Attribute::Naked))
+ BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
+ else
+ BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg)
+ .cloneMemRefs(MI);
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+ MIB.addRegMask(TRI->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
+ thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB =
+ BuildMI(mainMBB, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (Subtarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.cloneMemRefs(MI);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI.eraseFromParent();
+ return sinkMBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@@ -13930,6 +14085,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI.getOpcode() == PPC::SetJmp32 ||
+ MI.getOpcode() == PPC::SetJmp64) {
+ return emitSetJmp(MI, BB);
} else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
return emitEHSjLjLongJmp(MI, BB);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index cfcc6b5f03edc..b084bed075b73 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -386,6 +386,8 @@ namespace llvm {
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
@@ -858,6 +860,7 @@ namespace llvm {
const CallBase *CB) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 2b62654b08986..c4540c132c153 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -506,11 +506,16 @@ def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$RT), (ins),
// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
// is not.
let hasSideEffects = 1 in {
- let Defs = [CTR8] in
+ let Defs = [CTR8] in {
def EH_SjLj_SetJmp64 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP64",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[IsPPC64]>;
+ def SetJmp64 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
+ "#SETJMP64",
+ [(set i32:$dst, (PPCsetjmp addr:$buf))]>,
+ Requires<[IsPPC64]>;
+ }
}
let hasSideEffects = 1, isBarrier = 1 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 3271e4d279f56..ba07e318f6e8e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -590,6 +590,10 @@ def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
SDTypeProfile<1, 1, [SDTCisInt<0>,
SDTCisPtrTy<1>]>,
[SDNPHasChain, SDNPSideEffect]>;
+def PPCsetjmp : SDNode<"PPCISD::SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
// EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
@@ -1895,11 +1899,16 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$LI),
// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
// is not.
let hasSideEffects = 1 in {
- let Defs = [CTR] in
+ let Defs = [CTR] in {
def EH_SjLj_SetJmp32 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP32",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[IsPPC32]>;
+ def SetJmp32 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
+ "#SETJMP32",
+ [(set i32:$dst, (PPCsetjmp addr:$buf))]>,
+ Requires<[IsPPC32]>;
+ }
}
let hasSideEffects = 1, isBarrier = 1 in {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 84d66f88a812d..0a799d53c0619 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -808,6 +808,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// We're not using SJLJ for exception handling, but they're implemented
// solely to support use of __builtin_setjmp / __builtin_longjmp.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to use MVC in preference to even a single load/store pair.
@@ -1181,6 +1182,14 @@ SystemZTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
return SinkMBB;
}
+MachineBasicBlock *
+SystemZTargetLowering::emitSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ // emitSetJmp is identical to emitEHSjLjSetJmp because the SystemZ
+ // implementation of emitEHSjLjSetJmp already stores FP, SP, and IP
+ return emitEHSjLjSetJmp(MI, MBB);
+}
+
MachineBasicBlock *
SystemZTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@@ -7298,6 +7307,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
case ISD::READCYCLECOUNTER:
return lowerREADCYCLECOUNTER(Op, DAG);
case ISD::EH_SJLJ_SETJMP:
+ case ISD::SETJMP:
case ISD::EH_SJLJ_LONGJMP:
// These operations are legal on our platform, but we cannot actually
// set the operation action to Legal as common code would treat this
@@ -11192,6 +11202,8 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
return emitProbedAlloca(MI, MBB);
case SystemZ::EH_SjLj_SetJmp:
return emitEHSjLjSetJmp(MI, MBB);
+ case SystemZ::SetJmp:
+ return emitSetJmp(MI, MBB);
case SystemZ::EH_SjLj_LongJmp:
return emitEHSjLjLongJmp(MI, MBB);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index bb3eeba6446d2..a6a7f5dfd5a8b 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -115,6 +115,8 @@ class SystemZTargetLowering : public TargetLowering {
}
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 35a923d070e3e..6755578236b46 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1920,8 +1920,10 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in {
//--------------------------------------------------------------------------
let isBarrier = 1, hasNoSchedulingInfo = 1 in {
let hasSideEffects = 1, usesCustomInserter = 1 in {
- def EH_SjLj_SetJmp : Pseudo<(outs GR32:$dst), (ins ADDR64:$R2),
+ def EH_SjLj_SetJmp : Pseudo<(outs GR32:$dst), (ins ADDR64:$R2),
[(set GR32:$dst, (z_eh_sjlj_setjmp ADDR64:$R2))]>;
+ def SetJmp : Pseudo<(outs GR32:$dst), (ins ADDR64:$R2),
+ [(set GR32:$dst, (z_setjmp ADDR64:$R2))]>;
let isTerminator = 1 in {
def EH_SjLj_LongJmp : Pseudo<(outs), (ins ADDR64:$R2),
[(z_eh_sjlj_longjmp ADDR64:$R2)]>;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 2a5b0435c1565..36597fb5ecafd 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -414,6 +414,8 @@ def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>;
def z_eh_sjlj_setjmp : SDNode<"ISD::EH_SJLJ_SETJMP", SDT_ZSetJmp,
[SDNPHasChain, SDNPSideEffect]>;
+def z_setjmp : SDNode<"ISD::SETJMP", SDT_ZSetJmp,
+ [SDNPHasChain, SDNPSideEffect]>;
def z_eh_sjlj_longjmp : SDNode<"ISD::EH_SJLJ_LONGJMP", SDT_ZLongJmp,
[SDNPHasChain, SDNPSideEffect]>;
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 141196c332074..5958d2cec5246 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -298,6 +298,7 @@ void VETargetLowering::initSPUActions() {
/// SJLJ instructions {
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
/// } SJLJ instructions
@@ -1663,6 +1664,12 @@ SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
Op.getOperand(1));
}
+SDValue VETargetLowering::lowerSETJMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(VEISD::SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -1833,6 +1840,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::EH_SJLJ_SETJMP:
return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::SETJMP:
+ return lowerSETJMP(Op, DAG);
case ISD::EH_SJLJ_SETUP_DISPATCH:
return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
case ISD::FRAMEADDR:
@@ -2246,6 +2255,140 @@ VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
return SinkMBB;
}
+MachineBasicBlock *
+VETargetLowering::emitSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = ++MBB->getIterator();
+
+ // Memory Reference.
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
+ Register BufReg = MI.getOperand(1).getReg();
+
+ Register DstReg;
+
+ DstReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+ (void)TRI;
+ Register MainDestReg = MRI.createVirtualRegister(RC);
+ Register RestoreDestReg = MRI.createVirtualRegister(RC);
+
+ // Buffer layout:
+ // buf[0] = Frame Pointer (SX9, offset 0)
+ // buf[1] = IP (offset 8)
+ // buf[2] = Stack Pointer (SX11, offset 16)
+ // buf[3] = Base Pointer (SX17, offset 24)
+
+ MachineBasicBlock *ThisMBB = MBB;
+ MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, MainMBB);
+ MF->insert(I, SinkMBB);
+ MF->push_back(RestoreMBB);
+ RestoreMBB->setMachineBlockAddressTaken();
+
+ // Transfer the remainder of BB and its successor edges to SinkMBB.
+ SinkMBB->splice(SinkMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // ThisMBB:
+ Register LabelReg =
+ prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
+
+ // Store FP (SX9) to buf[0].
+ const VEFrameLowering *TFI = Subtarget->getFrameLowering();
+ if (TFI->hasFP(*MF)) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ MIB.addReg(BufReg);
+ MIB.addImm(0);
+ MIB.addImm(0);
+ MIB.addReg(VE::SX9);
+ MIB.setMemRefs(MMOs);
+ }
+
+ // Store SP (SX11) to buf[2].
+ {
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ MIB.addReg(BufReg);
+ MIB.addImm(0);
+ MIB.addImm(16);
+ MIB.addReg(VE::SX11);
+ MIB.setMemRefs(MMOs);
+ }
+
+ // Store BP in buf[3] iff this function is using BP.
+ if (TFI->hasBP(*MF)) {
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ MIB.addReg(BufReg);
+ MIB.addImm(0);
+ MIB.addImm(24);
+ MIB.addReg(VE::SX17);
+ MIB.setMemRefs(MMOs);
+ }
+
+ // Store IP in buf[1].
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+ MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
+ MIB.addImm(0);
+ MIB.addImm(8);
+ MIB.addReg(LabelReg, getKillRegState(true));
+ MIB.setMemRefs(MMOs);
+
+ // Insert setup.
+ MIB =
+ BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
+
+ const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ MIB.addRegMask(RegInfo->getNoPreservedMask());
+ ThisMBB->addSuccessor(MainMBB);
+ ThisMBB->addSuccessor(RestoreMBB);
+
+ // MainMBB:
+ BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+ MainMBB->addSuccessor(SinkMBB);
+
+ // SinkMBB:
+ BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
+ .addReg(MainDestReg)
+ .addMBB(MainMBB)
+ .addReg(RestoreDestReg)
+ .addMBB(RestoreMBB);
+
+ // RestoreMBB:
+ // Restore BP from buf[3] iff this function is using BP. The address of
+ // buf is in SX10.
+ // FIXME: Better to not use SX10 here
+ if (TFI->hasBP(*MF)) {
+ MachineInstrBuilder MIB =
+ BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
+ MIB.addReg(VE::SX10);
+ MIB.addImm(0);
+ MIB.addImm(24);
+ MIB.setMemRefs(MMOs);
+ }
+ BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1);
+ BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
+ RestoreMBB->addSuccessor(SinkMBB);
+
+ MI.eraseFromParent();
+ return SinkMBB;
+}
+
MachineBasicBlock *
VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@@ -2626,6 +2769,8 @@ VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitEHSjLjLongJmp(MI, BB);
case VE::EH_SjLj_SetJmp:
return emitEHSjLjSetJmp(MI, BB);
+ case VE::SetJmp:
+ return emitSetJmp(MI, BB);
case VE::EH_SjLj_Setup_Dispatch:
return emitSjLjDispatchBlock(MI, BB);
}
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 487804194757e..d3bcc0bf46a6c 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -190,6 +190,7 @@ class VETargetLowering : public TargetLowering {
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
@@ -219,6 +220,8 @@ class VETargetLowering : public TargetLowering {
MachineBasicBlock *MBB) const;
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
MachineBasicBlock *emitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *BB) const;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 9869f95ae5661..fdffa9a434201 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -465,6 +465,10 @@ def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP",
SDTypeProfile<1, 1, [SDTCisInt<0>,
SDTCisPtrTy<1>]>,
[SDNPHasChain, SDNPSideEffect]>;
+def VEsetjmp: SDNode<"VEISD::SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
def VEeh_sjlj_longjmp: SDNode<"VEISD::EH_SJLJ_LONGJMP",
SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPSideEffect]>;
@@ -1917,6 +1921,9 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
def EH_SjLj_SetJmp : Pseudo<(outs I32:$dst), (ins I64:$buf),
"# EH_SJLJ_SETJMP",
[(set I32:$dst, (VEeh_sjlj_setjmp I64:$buf))]>;
+ def SetJmp : Pseudo<(outs I32:$dst), (ins I64:$buf),
+ "# SETJMP",
+ [(set I32:$dst, (VEsetjmp I64:$buf))]>;
def EH_SjLj_Setup_Dispatch : Pseudo<(outs), (ins), "# EH_SJLJ_SETUP_DISPATCH",
[(VEeh_sjlj_setup_dispatch)]>;
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index be95168f2de00..bbef3ed46c213 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3035,6 +3035,7 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
+ Parent->getOpcode() != X86ISD::SETJMP && // setjmp
Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
unsigned AddrSpace =
cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f0e3bd3cebd66..137f8da87deec 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -510,6 +510,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
// LLVM/Clang supports zero-cost DWARF and SEH exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
@@ -28913,6 +28914,18 @@ SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
Op.getOperand(0), Op.getOperand(1));
}
+SDValue X86TargetLowering::lowerSETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ if (!Subtarget.is64Bit()) {
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
+ (void)TII->getGlobalBaseReg(&DAG.getMachineFunction());
+ }
+ return DAG.getNode(X86ISD::SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -34209,6 +34222,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::SETJMP: return lowerSETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::EH_SJLJ_SETUP_DISPATCH:
return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
@@ -37578,6 +37592,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MIB.addMBB(restoreMBB);
MIB.setMemRefs(MMOs);
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) {
emitSetJmpShadowStackFix(MI, thisMBB);
}
@@ -37586,7 +37602,6 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
- const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MIB.addRegMask(RegInfo->getNoPreservedMask());
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(restoreMBB);
@@ -37623,6 +37638,165 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
return sinkMBB;
}
+// Expand @llvm.setjmp pseudo. Like emitEHSjLjSetJmp but the backend is
+// responsible for storing FP and SP into the buffer (the frontend does not
+// emit @llvm.frameaddress / @llvm.stacksave stores).
+MachineBasicBlock *
+X86TargetLowering::emitSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ const MIMetadata MIMD(MI);
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = ++MBB->getIterator();
+
+ SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
+
+ unsigned MemOpndSlot = 0;
+ unsigned CurOp = 0;
+
+ Register DstReg = MI.getOperand(CurOp++).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+ (void)TRI;
+ Register mainDstReg = MRI.createVirtualRegister(RC);
+ Register restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MemOpndSlot = CurOp;
+
+ MVT PVT = getPointerTy(MF->getDataLayout());
+ assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+ MF->push_back(restoreMBB);
+ restoreMBB->setMachineBlockAddressTaken();
+
+ MachineInstrBuilder MIB;
+
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ unsigned PtrStoreOpc = 0;
+ Register LabelReg;
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
+ !isPositionIndependent();
+
+ // Prepare IP either in reg or imm.
+ if (!UseImmLabel) {
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ LabelReg = MRI.createVirtualRegister(PtrRC);
+ if (Subtarget.is64Bit()) {
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA64r), LabelReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB)
+ .addReg(0);
+ } else {
+ const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA32r), LabelReg)
+ .addReg(XII->getGlobalBaseReg(MF))
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
+ .addReg(0);
+ }
+ } else
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
+
+ // Store IP to buf[1].
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrStoreOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset);
+ else
+ MIB.add(MI.getOperand(MemOpndSlot + i));
+ }
+ if (!UseImmLabel)
+ MIB.addReg(LabelReg);
+ else
+ MIB.addMBB(restoreMBB);
+ MIB.setMemRefs(MMOs);
+
+ // Store FP to buf[0] and SP to buf[2].
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ unsigned RegStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+
+ bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
+ if (HasFP) {
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(RegStoreOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.add(MI.getOperand(MemOpndSlot + i));
+ MIB.addReg(RegInfo->getFrameRegister(*MF));
+ MIB.setMemRefs(MMOs);
+ }
+
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(RegStoreOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI.getOperand(MemOpndSlot + i), SPOffset);
+ else
+ MIB.add(MI.getOperand(MemOpndSlot + i));
+ }
+ MIB.addReg(RegInfo->getStackRegister());
+ MIB.setMemRefs(MMOs);
+
+ if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) {
+ emitSetJmpShadowStackFix(MI, thisMBB);
+ }
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup))
+ .addMBB(restoreMBB);
+
+ MIB.addRegMask(RegInfo->getNoPreservedMask());
+ thisMBB->addSuccessor(mainMBB);
+ thisMBB->addSuccessor(restoreMBB);
+
+ // mainMBB:
+ BuildMI(mainMBB, MIMD, TII->get(X86::MOV32r0), mainDstReg);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)
+ .addReg(mainDstReg)
+ .addMBB(mainMBB)
+ .addReg(restoreDstReg)
+ .addMBB(restoreMBB);
+
+ // restoreMBB:
+ if (RegInfo->hasBasePointer(*MF)) {
+ const bool Uses64BitFramePtr = Subtarget.isTarget64BitLP64();
+ X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ X86FI->setRestoreBasePointer(MF);
+ Register FramePtr = RegInfo->getFrameRegister(*MF);
+ Register BasePtr = RegInfo->getBaseRegister();
+ unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
+ addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr),
+ FramePtr, true, X86FI->getRestoreBasePointerOffset())
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ BuildMI(restoreMBB, MIMD, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
+ BuildMI(restoreMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);
+ restoreMBB->addSuccessor(sinkMBB);
+
+ MI.eraseFromParent();
+ return sinkMBB;
+}
+
/// Fix the shadow stack using the previously saved SSP pointer.
/// \sa emitSetJmpShadowStackFix
/// \param [in] MI The temporary Machine Instruction for the builtin.
@@ -38408,6 +38582,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::EH_SjLj_SetJmp64:
return emitEHSjLjSetJmp(MI, BB);
+ case X86::SetJmp32:
+ case X86::SetJmp64:
+ return emitSetJmp(MI, BB);
+
case X86::EH_SjLj_LongJmp32:
case X86::EH_SjLj_LongJmp64:
return emitEHSjLjLongJmp(MI, BB);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5c7c54cacd239..1274089fea7da 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -823,6 +823,7 @@ namespace llvm {
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
@@ -928,6 +929,8 @@ namespace llvm {
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitSetJmp(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
void emitSetJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index bc05dae7351bb..847d141d1ce90 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -214,6 +214,14 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
"#EH_SJLJ_SETJMP64",
[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
Requires<[In64BitMode]>;
+ def SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
+ "#SETJMP32",
+ [(set GR32:$dst, (X86setjmp addr:$buf))]>,
+ Requires<[Not64BitMode]>;
+ def SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
+ "#SETJMP64",
+ [(set GR32:$dst, (X86setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
let isTerminator = 1 in {
def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
"#EH_SJLJ_LONGJMP32",
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index 3cd05ab0351bd..fe59fbfc6201b 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -352,6 +352,10 @@ def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP",
SDTypeProfile<1, 1, [SDTCisInt<0>,
SDTCisPtrTy<1>]>,
[SDNPHasChain, SDNPSideEffect]>;
+def X86setjmp : SDNode<"X86ISD::SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
// SjLj exception handling longjmp.
def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP",
diff --git a/llvm/test/CodeGen/ARM/setjmp.ll b/llvm/test/CodeGen/ARM/setjmp.ll
new file mode 100644
index 0000000000000..e322bbe4d5d12
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/setjmp.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck --check-prefix=ARM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck --check-prefix=THUMB2 %s
+
+; Verify that @llvm.setjmp produces the same FP/SP stores as the old pattern
+; of @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+
+ at buf = internal global [5 x ptr] zeroinitializer
+
+; --- Old pattern (frameaddress + stacksave + stores + eh.sjlj.setjmp) ---
+
+declare ptr @llvm.frameaddress(i32) nounwind readnone
+declare ptr @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define i32 @old_setjmp() nounwind "frame-pointer"="all" {
+ %fp = call ptr @llvm.frameaddress(i32 0)
+ store ptr %fp, ptr @buf, align 16
+ %sp = call ptr @llvm.stacksave()
+ store ptr %sp, ptr getelementptr inbounds ([5 x ptr], ptr @buf, i64 0, i64 2), align 16
+ %r = call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+ ret i32 %r
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define i32 @new_setjmp() nounwind "frame-pointer"="all" {
+ %r = call i32 @llvm.setjmp(ptr @buf)
+ ret i32 %r
+}
+
+; Both functions should store FP (r7) to buf[0] and SP to buf[2].
+
+; ARM-LABEL: _old_setjmp:
+; ARM: str r7, [r0]
+; ARM: str sp, [r0, #8]
+; ARM-LABEL: _new_setjmp:
+; ARM: str r7, [r0]
+; ARM: str sp, [r0, #8]
+
+; THUMB2-LABEL: _old_setjmp:
+; THUMB2: str r7, [r0]
+; THUMB2: str.w sp, [r0, #8]
+; THUMB2-LABEL: _new_setjmp:
+; THUMB2: str r7, [r0]
+; THUMB2: str.w sp, [r0, #8]
diff --git a/llvm/test/CodeGen/PowerPC/setjmp.ll b/llvm/test/CodeGen/PowerPC/setjmp.ll
new file mode 100644
index 0000000000000..f0b86a790c488
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/setjmp.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -verify-machineinstrs | FileCheck %s
+
+; Verify that @llvm.setjmp produces the same FP/SP stores as the old pattern
+; of @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+
+ at buf = internal global [5 x ptr] zeroinitializer, align 8
+
+; --- Old pattern (frameaddress + stacksave + stores + eh.sjlj.setjmp) ---
+
+declare ptr @llvm.frameaddress(i32) nounwind readnone
+declare ptr @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define i32 @old_setjmp() nounwind "frame-pointer"="all" {
+ %fp = call ptr @llvm.frameaddress(i32 0)
+ store ptr %fp, ptr @buf, align 8
+ %sp = call ptr @llvm.stacksave()
+ store ptr %sp, ptr getelementptr inbounds (ptr, ptr @buf, i64 2), align 8
+ %r = call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+ ret i32 %r
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define i32 @new_setjmp() nounwind "frame-pointer"="all" {
+ %r = call i32 @llvm.setjmp(ptr @buf)
+ ret i32 %r
+}
+
+; Both functions should store FP (r31) to buf[0] and SP (r1) to buf[2] (offset 16).
+
+; CHECK-LABEL: old_setjmp:
+; CHECK: std 31, buf at toc@l(
+; CHECK: std 1, 16(
+; CHECK-LABEL: new_setjmp:
+; CHECK: std 31, 0(
+; CHECK: std 1, 16(
diff --git a/llvm/test/CodeGen/SystemZ/setjmp.ll b/llvm/test/CodeGen/SystemZ/setjmp.ll
new file mode 100644
index 0000000000000..01c78e58aff2d
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/setjmp.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Verify that @llvm.setjmp produces the same IP/SP stores as the old pattern
+; of @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+; SystemZ's eh.sjlj.setjmp already stores IP and SP internally, so
+; the old and new patterns should produce identical output.
+
+ at buf = global [20 x ptr] zeroinitializer, align 8
+
+; --- Old pattern (eh.sjlj.setjmp, which already stores IP+SP on SystemZ) ---
+
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define void @old_setjmp() nounwind {
+ %r = tail call i32 @llvm.eh.sjlj.setjmp(ptr nonnull @buf)
+ ret void
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define void @new_setjmp() nounwind {
+ %r = tail call i32 @llvm.setjmp(ptr nonnull @buf)
+ ret void
+}
+
+; Both should store IP to buf[1] (offset 8) and SP to buf[3] (offset 24).
+
+; CHECK-LABEL: old_setjmp:
+; CHECK: stg %r0, 8(%r1)
+; CHECK: stg %r15, 24(%r1)
+; CHECK-LABEL: new_setjmp:
+; CHECK: stg %r0, 8(%r1)
+; CHECK: stg %r15, 24(%r1)
diff --git a/llvm/test/CodeGen/VE/Scalar/setjmp.ll b/llvm/test/CodeGen/VE/Scalar/setjmp.ll
new file mode 100644
index 0000000000000..24747bacbbfea
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Scalar/setjmp.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=ve | FileCheck %s
+
+; Verify that @llvm.setjmp produces the same FP/SP stores as the old pattern
+; of @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+
+ at buf = common global [1 x [25 x i64]] zeroinitializer, align 8
+
+; --- Old pattern (frameaddress + stacksave + stores + eh.sjlj.setjmp) ---
+
+declare ptr @llvm.frameaddress(i32) nounwind readnone
+declare ptr @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define i32 @old_setjmp() nounwind "frame-pointer"="all" {
+ %fp = call ptr @llvm.frameaddress(i32 0)
+ store ptr %fp, ptr @buf, align 8
+ %sp = call ptr @llvm.stacksave()
+ store ptr %sp, ptr getelementptr inbounds (ptr, ptr @buf, i64 2), align 8
+ %r = call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+ ret i32 %r
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define i32 @new_setjmp() nounwind "frame-pointer"="all" {
+ %r = call i32 @llvm.setjmp(ptr @buf)
+ ret i32 %r
+}
+
+; Both functions should store FP (s9) to buf[0] and SP (s11) to buf[2].
+
+; CHECK-LABEL: old_setjmp:
+; CHECK: st %s9, (, %s0)
+; CHECK: st %s11, 16(, %s0)
+; CHECK-LABEL: new_setjmp:
+; CHECK: st %s9, (, %s0)
+; CHECK: st %s11, 16(, %s0)
diff --git a/llvm/test/CodeGen/X86/setjmp.ll b/llvm/test/CodeGen/X86/setjmp.ll
new file mode 100644
index 0000000000000..1e4b195949576
--- /dev/null
+++ b/llvm/test/CodeGen/X86/setjmp.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck --check-prefix=X86 %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck --check-prefix=X64 %s
+; RUN: llc < %s -mtriple=x86_64-windows-gnu | FileCheck --check-prefix=WIN64 %s
+
+; Verify that @llvm.setjmp produces the same output as the old pattern of
+; @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+
+ at buf = internal global [5 x ptr] zeroinitializer
+
+; --- Old pattern (frameaddress + stacksave + stores + eh.sjlj.setjmp) ---
+
+declare ptr @llvm.frameaddress(i32) nounwind readnone
+declare ptr @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define i32 @old_setjmp() nounwind "frame-pointer"="all" {
+ %fp = tail call ptr @llvm.frameaddress(i32 0)
+ store ptr %fp, ptr @buf, align 16
+ %sp = tail call ptr @llvm.stacksave()
+ store ptr %sp, ptr getelementptr inbounds ([5 x ptr], ptr @buf, i64 0, i64 2), align 16
+ %r = tail call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+ ret i32 %r
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define i32 @new_setjmp() nounwind "frame-pointer"="all" {
+ %r = tail call i32 @llvm.setjmp(ptr @buf)
+ ret i32 %r
+}
+
+; Both functions should store FP to buf[0], SP to buf[2], IP to buf[1].
+
+; X86-LABEL: old_setjmp:
+; X86: movl %ebp, buf
+; X86: movl %esp, buf+8
+; X86-LABEL: new_setjmp:
+; X86: movl %ebp, buf
+; X86: movl %esp, buf+8
+
+; X64-LABEL: old_setjmp:
+; X64: movq %rbp, buf(%rip)
+; X64: movq %rsp, buf+16(%rip)
+; X64-LABEL: new_setjmp:
+; X64: movq %rbp, buf(%rip)
+; X64: movq %rsp, buf+16(%rip)
+
+; On WIN64, the old pattern stores an adjusted address from @llvm.frameaddress
+; (which is wrong on WindowsCFI targets). The new @llvm.setjmp stores %rbp
+; directly, which is the correct fix.
+; WIN64-LABEL: new_setjmp:
+; WIN64: movq %rbp, buf(%rip)
+; WIN64: movq %rsp, buf+16(%rip)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 135d1e4007d49..ad5212fcfda45 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -230,11 +230,22 @@ class ROCDL_SpecialIdRegisterOp<string mnemonic> :
// ROCDL vector types definitions
//===----------------------------------------------------------------------===//
+class ROCDL_NamedType<string name> {
+ string typeName = name;
+}
+
class ROCDL_ConcreteVector<Type elem, int length> :
FixedVectorOfLengthAndType<[length], [elem]>,
BuildableType<
"::mlir::VectorType::get({" # length # "} ,"
- # elem.builderCall # ")">;
+ # elem.builderCall # ")">,
+ ROCDL_NamedType<"vector<" # length # "x"
+ # !tolower(!cast<string>(elem)) # ">">;
+
+class ROCDL_Scalar<Type elem> :
+ Type<elem.predicate, elem.summary>,
+ BuildableType<elem.builderCall>,
+ ROCDL_NamedType<!tolower(!cast<string>(elem))>;
def ROCDL_V2I16Type : ROCDL_ConcreteVector<I16, 2>;
def ROCDL_V2F16Type : ROCDL_ConcreteVector<F16, 2>;
@@ -925,7 +936,7 @@ def ROCDL_IglpOpt : ROCDL_ConcreteNonMemIntrOp<"iglp.opt", [], 0, [0], ["variant
//===---------------------------------------------------------------------===//
// Xdlops intrinsics
-class ROCDL_Mfma_IntrOp<string mnemonic, Type ABType, Type CDType> :
+class ROCDL_Mfma_IntrOp<string mnemonic, ROCDL_NamedType ABType, ROCDL_NamedType CDType> :
ROCDL_IntrOp<mnemonic, [], [], [], 1, 0, 0, 0, [3, 4, 5], ["cbsz", "abid", "blgp"]>,
Arguments<(ins
ABType:$a,
@@ -945,19 +956,10 @@ class ROCDL_Mfma_IntrOp<string mnemonic, Type ABType, Type CDType> :
Example:
```mlir
- // MFMA with f32 inputs and 32-wide f32 accumulator.
- %r0 = rocdl.mfma.f32.32x32x1f32 %a0, %b0, %c0, 0, 0, 0 :
- (f32, f32, vector<32xf32>) -> vector<32xf32>
-
- // MFMA with i8 inputs and 32-wide i32 accumulator.
- %r1 = rocdl.mfma.i32.32x32x4i8 %a1, %a1, %c1, 0, 0, 0 :
- (i32, i32, vector<32xi32>) -> vector<32xi32>
-
- // MFMA with bf16 inputs and 32-wide f32 accumulator.
- %r2 = rocdl.mfma.f32.32x32x2bf16 %a2, %a2, %c0, 0, 0, 0 :
- (vector<2xi16>, vector<2xi16>, vector<32xf32>) -> vector<32xf32>
- ```
- }];
+ %r0 = }] # mnemonic # [{ %a0, %b0, %c0, 0, 0, 0 : (}] # ABType.typeName
+ # [{, }] # ABType.typeName # [{, }] # CDType.typeName # [{) -> }]
+ # CDType.typeName # [{
+ ```}];
}
class ROCDL_Mfma_Scale_IntrOp<string mnemonic, Type AB, Type CD> :
@@ -1038,21 +1040,21 @@ class ROCDL_Smfmac_IntrOp<string mnemonic, Type AType, Type BType, Type CDType>
}
// Available on all CDNA.
-def ROCDL_mfma_f32_32x32x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x1f32", /*Type AB=*/F32, /*Type CD=*/ROCDL_ConcreteVector<F32, 32>>;
-def ROCDL_mfma_f32_16x16x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x1f32", F32, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_4x4x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x1f32", F32, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_32x32x2f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2f32", F32, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_16x16x4f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f32", F32, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_32x32x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x1f32", /*Type AB=*/ROCDL_Scalar<F32>, /*Type CD=*/ROCDL_ConcreteVector<F32, 32>>;
+def ROCDL_mfma_f32_16x16x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x1f32", ROCDL_Scalar<F32>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_4x4x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x1f32", ROCDL_Scalar<F32>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_32x32x2f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2f32", ROCDL_Scalar<F32>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_16x16x4f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f32", ROCDL_Scalar<F32>, ROCDL_ConcreteVector<F32, 4>>;
def ROCDL_mfma_f32_32x32x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 32>>;
def ROCDL_mfma_f32_16x16x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 16>>;
def ROCDL_mfma_f32_4x4x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x4f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 4>>;
def ROCDL_mfma_f32_32x32x8f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x8f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 16>>;
def ROCDL_mfma_f32_16x16x16f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x16f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_i32_32x32x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x4i8", I32, ROCDL_ConcreteVector<I32, 32>>;
-def ROCDL_mfma_i32_16x16x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x4i8", I32, ROCDL_ConcreteVector<I32, 16>>;
-def ROCDL_mfma_i32_4x4x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.4x4x4i8", I32, ROCDL_ConcreteVector<I32, 4>>;
-def ROCDL_mfma_i32_32x32x8i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x8i8", I32, ROCDL_ConcreteVector<I32, 16>>;
-def ROCDL_mfma_i32_16x16x16i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x16i8", I32, ROCDL_ConcreteVector<I32, 4>>;
+def ROCDL_mfma_i32_32x32x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x4i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 32>>;
+def ROCDL_mfma_i32_16x16x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x4i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 16>>;
+def ROCDL_mfma_i32_4x4x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.4x4x4i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 4>>;
+def ROCDL_mfma_i32_32x32x8i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x8i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 16>>;
+def ROCDL_mfma_i32_16x16x16i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x16i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 4>>;
def ROCDL_mfma_f32_32x32x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2bf16", ROCDL_ConcreteVector<I16, 2>, ROCDL_ConcreteVector<F32, 32>>;
def ROCDL_mfma_f32_16x16x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x2bf16", ROCDL_ConcreteVector<I16, 2>, ROCDL_ConcreteVector<F32, 16>>;
def ROCDL_mfma_f32_4x4x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x2bf16", ROCDL_ConcreteVector<I16, 2>, ROCDL_ConcreteVector<F32, 4>>;
@@ -1066,21 +1068,21 @@ def ROCDL_mfma_f32_32x32x8bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x8bf16.1k",
def ROCDL_mfma_f32_16x16x16bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x16bf16.1k", ROCDL_ConcreteVector<I16, 4>, ROCDL_ConcreteVector<F32, 4>>;
// Note: in gfx94x, unlike in gfx90a, the f64 xdlops use the "blgp" argument as
// a NEG bitfield. See IntrinsicsAMDGPU.td for more info.
-def ROCDL_mfma_f64_16x16x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.16x16x4f64", F64, ROCDL_ConcreteVector<F64, 4>>;
-def ROCDL_mfma_f64_4x4x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.4x4x4f64", F64, F64>;
+def ROCDL_mfma_f64_16x16x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.16x16x4f64", ROCDL_Scalar<F64>, ROCDL_ConcreteVector<F64, 4>>;
+def ROCDL_mfma_f64_4x4x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.4x4x4f64", ROCDL_Scalar<F64>, ROCDL_Scalar<F64>>;
// New in gfx94x.
-def ROCDL_mfma_i32_16x16x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x32.i8", I64, ROCDL_ConcreteVector<I32, 4>>;
-def ROCDL_mfma_i32_32x32x16_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x16.i8", I64, ROCDL_ConcreteVector<I32, 16>>;
+def ROCDL_mfma_i32_16x16x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x32.i8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<I32, 4>>;
+def ROCDL_mfma_i32_32x32x16_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x16.i8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<I32, 16>>;
def ROCDL_mfma_f32_16x16x8_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x8.xf32", ROCDL_ConcreteVector<F32, 2>, ROCDL_ConcreteVector<F32, 4>>;
def ROCDL_mfma_f32_32x32x4_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4.xf32", ROCDL_ConcreteVector<F32, 2>, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_16x16x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.bf8", I64, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_16x16x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.fp8", I64, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_16x16x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.bf8", I64, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_16x16x32_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.fp8", I64, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_32x32x16_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.bf8", I64, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_32x32x16_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.fp8", I64, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_32x32x16_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.bf8", I64, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_32x32x16_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.fp8", I64, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_16x16x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.bf8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_16x16x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.fp8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_16x16x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.bf8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_16x16x32_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.fp8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_32x32x16_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.bf8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_32x32x16_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.fp8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_32x32x16_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.bf8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_32x32x16_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.fp8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 16>>;
// New in gfx950.
def ROCDL_mfma_f32_16x16x32_bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf16", ROCDL_ConcreteVector<BF16, 8>, ROCDL_ConcreteVector<F32, 4>>;
def ROCDL_mfma_i32_16x16x64_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x64.i8", ROCDL_ConcreteVector<I32, 4>, ROCDL_ConcreteVector<I32, 4>>;
More information about the Mlir-commits
mailing list