[Mlir-commits] [clang] [llvm] [mlir] Delegate __builtin_setjmp FP save to backend on windows CFI targets (PR #186843)

Pyry Kovanen llvmlistbot at llvm.org
Wed Mar 18 06:30:00 PDT 2026


https://github.com/pkova updated https://github.com/llvm/llvm-project/pull/186843

>From c62583add00fb5e6a29661471ff77992fd3085e6 Mon Sep 17 00:00:00 2001
From: pkova <pyry at urbit.org>
Date: Wed, 18 Mar 2026 15:26:45 +0200
Subject: [PATCH] Add @llvm.setjmp intrinsic to store FP, IP and SP on the
 backend

---
 clang/lib/CodeGen/CGBuiltin.cpp               |  26 +--
 .../CodeGen/SystemZ/builtin-setjmp-logjmp.c   |   2 +-
 clang/test/Sema/builtin-longjmp.c             |   2 +-
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |   6 +
 llvm/include/llvm/IR/Intrinsics.td            |   2 +
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |   2 +
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  10 +
 .../SelectionDAG/SelectionDAGDumper.cpp       |   1 +
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  30 +++
 llvm/lib/Target/ARM/ARMISelLowering.h         |   3 +
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 158 +++++++++++++++
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   3 +
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |   7 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  11 +-
 .../Target/SystemZ/SystemZISelLowering.cpp    |  12 ++
 llvm/lib/Target/SystemZ/SystemZISelLowering.h |   2 +
 llvm/lib/Target/SystemZ/SystemZInstrInfo.td   |   4 +-
 llvm/lib/Target/SystemZ/SystemZOperators.td   |   2 +
 llvm/lib/Target/VE/VEISelLowering.cpp         | 145 ++++++++++++++
 llvm/lib/Target/VE/VEISelLowering.h           |   3 +
 llvm/lib/Target/VE/VEInstrInfo.td             |   7 +
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp       |   1 +
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 180 +++++++++++++++++-
 llvm/lib/Target/X86/X86ISelLowering.h         |   3 +
 llvm/lib/Target/X86/X86InstrCompiler.td       |   8 +
 llvm/lib/Target/X86/X86InstrFragments.td      |   4 +
 llvm/test/CodeGen/ARM/setjmp.ll               |  47 +++++
 llvm/test/CodeGen/PowerPC/setjmp.ll           |  39 ++++
 llvm/test/CodeGen/SystemZ/setjmp.ll           |  35 ++++
 llvm/test/CodeGen/VE/Scalar/setjmp.ll         |  39 ++++
 llvm/test/CodeGen/X86/setjmp.ll               |  55 ++++++
 mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td  |  76 ++++----
 32 files changed, 858 insertions(+), 67 deletions(-)
 create mode 100644 llvm/test/CodeGen/ARM/setjmp.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/setjmp.ll
 create mode 100644 llvm/test/CodeGen/SystemZ/setjmp.ll
 create mode 100644 llvm/test/CodeGen/VE/Scalar/setjmp.ll
 create mode 100644 llvm/test/CodeGen/X86/setjmp.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index df03e84ce9f81..34aedc111ee4c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4916,32 +4916,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
   }
   case Builtin::BI__builtin_setjmp: {
-    // Buffer is a void**.
     Address Buf = EmitPointerWithAlignment(E->getArg(0));
 
-    if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
-      // On this target, the back end fills in the context buffer completely.
-      // It doesn't really matter if the frontend stores to the buffer before
-      // calling setjmp, the back-end is going to overwrite them anyway.
-      Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
-      return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
-    }
-
-    // Store the frame pointer to the setjmp buffer.
-    Value *FrameAddr = Builder.CreateCall(
-        CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
-        ConstantInt::get(Int32Ty, 0));
-    Builder.CreateStore(FrameAddr, Buf);
-
-    // Store the stack pointer to the setjmp buffer.
-    Value *StackAddr = Builder.CreateStackSave();
-    assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
-
-    Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
-    Builder.CreateStore(StackAddr, StackSaveSlot);
-
-    // Call LLVM's EH setjmp, which is lightweight.
-    Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
+    // The backend handles all buffer stores (FP, SP, IP) via @llvm.setjmp.
+    Function *F = CGM.getIntrinsic(Intrinsic::setjmp);
     return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
   }
   case Builtin::BI__builtin_longjmp: {
diff --git a/clang/test/CodeGen/SystemZ/builtin-setjmp-logjmp.c b/clang/test/CodeGen/SystemZ/builtin-setjmp-logjmp.c
index 898891fa182ea..d2522c6bab7df 100644
--- a/clang/test/CodeGen/SystemZ/builtin-setjmp-logjmp.c
+++ b/clang/test/CodeGen/SystemZ/builtin-setjmp-logjmp.c
@@ -6,7 +6,7 @@ void *buf[20];
 // CHECK-LABEL: define dso_local void @foo(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.setjmp(ptr @buf)
 // CHECK-NEXT:    ret void
 //
 void foo()
diff --git a/clang/test/Sema/builtin-longjmp.c b/clang/test/Sema/builtin-longjmp.c
index 99463cf3385a1..b320ec0b6a2d2 100644
--- a/clang/test/Sema/builtin-longjmp.c
+++ b/clang/test/Sema/builtin-longjmp.c
@@ -21,7 +21,7 @@ jmp_buf buf;
 // CHECK:   call{{.*}} void @llvm.eh.sjlj.longjmp
 
 // CHECK:   define{{.*}} void @do_setjmp()
-// CHECK:   call{{.*}} i32 @llvm.eh.sjlj.setjmp
+// CHECK:   call{{.*}} i32 @llvm.setjmp
 
 void do_jump(void) {
   __builtin_longjmp(buf, 1); // expected-error {{__builtin_longjmp is not supported for the current target}}
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index fa578f733d4e8..2a49f30210b2c 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -161,6 +161,12 @@ enum NodeType {
   /// and returns an outchain.
   EH_SJLJ_SETJMP,
 
+  /// RESULT, OUTCHAIN = SETJMP(INCHAIN, buffer)
+  /// This corresponds to the setjmp intrinsic. Like EH_SJLJ_SETJMP but the
+  /// backend is responsible for storing all of FP, SP, and IP into the buffer
+  /// (the frontend does not emit any buffer stores).
+  SETJMP,
+
   /// OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer)
   /// This corresponds to the eh.sjlj.longjmp intrinsic.
   /// It takes an input chain and a pointer to the jump buffer as inputs
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 4469ff155b854..5b309751fc517 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1571,6 +1571,8 @@ def int_eh_sjlj_setjmp          : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
 def int_eh_sjlj_longjmp         : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>;
 def int_eh_sjlj_setup_dispatch  : Intrinsic<[], []>;
 
+def int_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
+
 //===---------------- Generic Variable Attribute Intrinsics----------------===//
 //
 def int_var_annotation : DefaultAttrsIntrinsic<
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5e54343f7f146..7b35825ba995c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1116,6 +1116,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::FRAME_TO_ARGS_OFFSET:
   case ISD::EH_DWARF_CFA:
   case ISD::EH_SJLJ_SETJMP:
+  case ISD::SETJMP:
   case ISD::EH_SJLJ_LONGJMP:
   case ISD::EH_SJLJ_SETUP_DISPATCH:
     // These operations lie about being legal: when they claim to be legal,
@@ -3311,6 +3312,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Node->getOperand(0));
     break;
   case ISD::EH_SJLJ_SETJMP:
+  case ISD::SETJMP:
     // If the target didn't expand this, just return 'zero' and preserve the
     // chain.
     Results.push_back(DAG.getConstant(0, dl, MVT::i32));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index eb55a68eaba84..8f5ed1c312f26 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6863,6 +6863,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     MFI.setFunctionContextIndex(FI);
     return;
   }
+  case Intrinsic::setjmp: {
+    SDValue Ops[2];
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getArgOperand(0));
+    SDValue Op = DAG.getNode(ISD::SETJMP, sdl,
+                             DAG.getVTList(MVT::i32, MVT::Other), Ops);
+    setValue(&I, Op.getValue(0));
+    DAG.setRoot(Op.getValue(1));
+    return;
+  }
   case Intrinsic::eh_sjlj_setjmp: {
     SDValue Ops[2];
     Ops[0] = getRoot();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 7161dd299f830..890545c756351 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -160,6 +160,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::EH_DWARF_CFA:               return "EH_DWARF_CFA";
   case ISD::EH_RETURN:                  return "EH_RETURN";
   case ISD::EH_SJLJ_SETJMP:             return "EH_SJLJ_SETJMP";
+  case ISD::SETJMP:                     return "SETJMP";
   case ISD::EH_SJLJ_LONGJMP:            return "EH_SJLJ_LONGJMP";
   case ISD::EH_SJLJ_SETUP_DISPATCH:     return "EH_SJLJ_SETUP_DISPATCH";
   case ISD::ConstantPool:               return "ConstantPool";
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 970c962197ac0..ef7dce063ab69 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1195,6 +1195,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::SETJMP, MVT::i32, Custom);
   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
 
@@ -3776,6 +3777,34 @@ ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
                      Op.getOperand(1), Val);
 }
 
+SDValue
+ARMTargetLowering::LowerSETJMP(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Buf = Op.getOperand(1);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+  // Store FP into buf[0].
+  const ARMBaseRegisterInfo &ARI =
+      *static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
+  Register FrameReg = ARI.getFrameRegister(MF);
+  SDValue FP = DAG.getCopyFromReg(Chain, dl, FrameReg, PtrVT);
+  Chain = DAG.getStore(FP.getValue(1), dl, FP, Buf, MachinePointerInfo());
+
+  // Store SP into buf[2] (offset 8).
+  SDValue SP = DAG.getCopyFromReg(Chain, dl, ARM::SP, PtrVT);
+  SDValue SPAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Buf,
+                               DAG.getConstant(8, dl, PtrVT));
+  Chain = DAG.getStore(SP.getValue(1), dl, SP, SPAddr, MachinePointerInfo());
+
+  // Delegate to EH_SJLJ_SETJMP for IP store + return value.
+  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
+  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
+                     DAG.getVTList(MVT::i32, MVT::Other), Chain, Buf, Val);
+}
+
 SDValue
 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -10435,6 +10464,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::SETJMP: return LowerSETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
   case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
   case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index e58d872c548e4..314d831a686b8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -546,6 +546,7 @@ class VectorType;
                                             SDValue Dst,
                                             ISD::ArgFlagsTy Flags) const;
     SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG,
@@ -713,6 +714,8 @@ class VectorType;
 
     void EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const;
 
+
+
     MachineBasicBlock *EmitStructByval(MachineInstr &MI,
                                        MachineBasicBlock *MBB) const;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1515ff2e13b85..fcafe7a056122 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -601,6 +601,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   // your own exception handling based on them.
   // LLVM/Clang supports zero-cost DWARF exception handling.
   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::SETJMP, MVT::i32, Custom);
   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 
   // We want to legalize GlobalAddress and ConstantPool nodes into the
@@ -7985,6 +7986,14 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
                      Op.getOperand(0), Op.getOperand(1));
 }
 
+SDValue PPCTargetLowering::lowerSETJMP(SDValue Op,
+                                       SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  return DAG.getNode(PPCISD::SETJMP, DL,
+                     DAG.getVTList(MVT::i32, MVT::Other),
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
                                                 SelectionDAG &DAG) const {
   SDLoc DL(Op);
@@ -12745,6 +12754,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   // Exception handling lowering.
   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::SETJMP:             return lowerSETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
 
   case ISD::LOAD:               return LowerLOAD(Op, DAG);
@@ -13577,6 +13587,151 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   return sinkMBB;
 }
 
+MachineBasicBlock *
+PPCTargetLowering::emitSetJmp(MachineInstr &MI,
+                              MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI.getDebugLoc();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const BasicBlock *BB = MBB->getBasicBlock();
+  MachineFunction::iterator I = ++MBB->getIterator();
+
+  Register DstReg = MI.getOperand(0).getReg();
+  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+  Register mainDstReg = MRI.createVirtualRegister(RC);
+  Register restoreDstReg = MRI.createVirtualRegister(RC);
+
+  MVT PVT = getPointerTy(MF->getDataLayout());
+  assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
+
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, mainMBB);
+  MF->insert(I, sinkMBB);
+
+  MachineInstrBuilder MIB;
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), MBB,
+                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // Buffer layout:
+  //   buf[0] = Frame Pointer
+  //   buf[1] = IP (return address / LR)
+  //   buf[2] = Stack Pointer
+  //   buf[3] = TOC pointer (R2, 64-bit ELF only)
+  //   buf[4] = Base Pointer
+  const int64_t FPOffset = 0;
+  const int64_t LabelOffset = 1 * PVT.getStoreSize();
+  const int64_t SPOffset = 2 * PVT.getStoreSize();
+  const int64_t TOCOffset = 3 * PVT.getStoreSize();
+  const int64_t BPOffset = 4 * PVT.getStoreSize();
+
+  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+  Register LabelReg = MRI.createVirtualRegister(PtrRC);
+  Register BufReg = MI.getOperand(1).getReg();
+
+  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+  // Store FP to buf[0] if we have a frame pointer.
+  // Note: hasFP() is unreliable here because it depends on getStackSize()
+  // which isn't known yet during ISel. Use needsFP() instead.
+  auto *TFI = static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
+  if (TFI->needsFP(*MF)) {
+    unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+    MIB = BuildMI(*thisMBB, MI, DL,
+                  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
+              .addReg(FP)
+              .addImm(FPOffset)
+              .addReg(BufReg)
+              .cloneMemRefs(MI);
+  }
+
+  // Store SP to buf[2].
+  MIB = BuildMI(*thisMBB, MI, DL,
+                TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
+            .addReg(SP)
+            .addImm(SPOffset)
+            .addReg(BufReg)
+            .cloneMemRefs(MI);
+
+  // Store TOC (R2) for 64-bit ELF.
+  if (Subtarget.is64BitELFABI()) {
+    setUsesTOCBasePtr(*MBB->getParent());
+    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+              .addReg(PPC::X2)
+              .addImm(TOCOffset)
+              .addReg(BufReg)
+              .cloneMemRefs(MI);
+  }
+
+  // Store BP.
+  unsigned BaseReg;
+  if (MF->getFunction().hasFnAttribute(Attribute::Naked))
+    BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
+  else
+    BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+  MIB = BuildMI(*thisMBB, MI, DL,
+                TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
+            .addReg(BaseReg)
+            .addImm(BPOffset)
+            .addReg(BufReg)
+            .cloneMemRefs(MI);
+
+  // Setup
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+  MIB.addRegMask(TRI->getNoPreservedMask());
+
+  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+          .addMBB(mainMBB);
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
+  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
+
+  // mainMBB:
+  //  mainDstReg = 0
+  MIB =
+      BuildMI(mainMBB, DL,
+              TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+  // Store IP
+  if (Subtarget.isPPC64()) {
+    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+            .addReg(LabelReg)
+            .addImm(LabelOffset)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+            .addReg(LabelReg)
+            .addImm(LabelOffset)
+            .addReg(BufReg);
+  }
+  MIB.cloneMemRefs(MI);
+
+  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+  mainMBB->addSuccessor(sinkMBB);
+
+  // sinkMBB:
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+          TII->get(PPC::PHI), DstReg)
+    .addReg(mainDstReg).addMBB(mainMBB)
+    .addReg(restoreDstReg).addMBB(thisMBB);
+
+  MI.eraseFromParent();
+  return sinkMBB;
+}
+
 MachineBasicBlock *
 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
                                      MachineBasicBlock *MBB) const {
@@ -13930,6 +14085,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
     return emitEHSjLjSetJmp(MI, BB);
+  } else if (MI.getOpcode() == PPC::SetJmp32 ||
+             MI.getOpcode() == PPC::SetJmp64) {
+    return emitSetJmp(MI, BB);
   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
     return emitEHSjLjLongJmp(MI, BB);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index cfcc6b5f03edc..b084bed075b73 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -386,6 +386,8 @@ namespace llvm {
 
     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
                                         MachineBasicBlock *MBB) const;
+    MachineBasicBlock *emitSetJmp(MachineInstr &MI,
+                                  MachineBasicBlock *MBB) const;
 
     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
                                          MachineBasicBlock *MBB) const;
@@ -858,6 +860,7 @@ namespace llvm {
                           const CallBase *CB) const;
 
     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerSETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 2b62654b08986..c4540c132c153 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -506,11 +506,16 @@ def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$RT), (ins),
 // While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
 // is not.
 let hasSideEffects = 1 in {
-  let Defs = [CTR8] in
+  let Defs = [CTR8] in {
   def EH_SjLj_SetJmp64  : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
                             "#EH_SJLJ_SETJMP64",
                             [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
                           Requires<[IsPPC64]>;
+  def SetJmp64           : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
+                            "#SETJMP64",
+                            [(set i32:$dst, (PPCsetjmp addr:$buf))]>,
+                          Requires<[IsPPC64]>;
+  }
 }
 
 let hasSideEffects = 1, isBarrier = 1 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 3271e4d279f56..ba07e318f6e8e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -590,6 +590,10 @@ def PPCeh_sjlj_setjmp  : SDNode<"PPCISD::EH_SJLJ_SETJMP",
                                 SDTypeProfile<1, 1, [SDTCisInt<0>,
                                                      SDTCisPtrTy<1>]>,
                                 [SDNPHasChain, SDNPSideEffect]>;
+def PPCsetjmp          : SDNode<"PPCISD::SETJMP",
+                                SDTypeProfile<1, 1, [SDTCisInt<0>,
+                                                     SDTCisPtrTy<1>]>,
+                                [SDNPHasChain, SDNPSideEffect]>;
 
 // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
 def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
@@ -1895,11 +1899,16 @@ def TAILBA   : IForm<18, 0, 0, (outs), (ins abscalltarget:$LI),
 // While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
 // is not.
 let hasSideEffects = 1 in {
-  let Defs = [CTR] in
+  let Defs = [CTR] in {
   def EH_SjLj_SetJmp32  : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
                             "#EH_SJLJ_SETJMP32",
                             [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
                           Requires<[IsPPC32]>;
+  def SetJmp32           : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
+                            "#SETJMP32",
+                            [(set i32:$dst, (PPCsetjmp addr:$buf))]>,
+                          Requires<[IsPPC32]>;
+  }
 }
 
 let hasSideEffects = 1, isBarrier = 1 in {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 84d66f88a812d..0a799d53c0619 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -808,6 +808,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   // We're not using SJLJ for exception handling, but they're implemented
   // solely to support use of __builtin_setjmp / __builtin_longjmp.
   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::SETJMP, MVT::i32, Custom);
   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 
   // We want to use MVC in preference to even a single load/store pair.
@@ -1181,6 +1182,14 @@ SystemZTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   return SinkMBB;
 }
 
+MachineBasicBlock *
+SystemZTargetLowering::emitSetJmp(MachineInstr &MI,
+                                  MachineBasicBlock *MBB) const {
+  // emitSetJmp is identical to emitEHSjLjSetJmp because the SystemZ
+  // implementation of emitEHSjLjSetJmp already stores FP, SP, and IP
+  return emitEHSjLjSetJmp(MI, MBB);
+}
+
 MachineBasicBlock *
 SystemZTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
                                          MachineBasicBlock *MBB) const {
@@ -7298,6 +7307,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
   case ISD::READCYCLECOUNTER:
     return lowerREADCYCLECOUNTER(Op, DAG);
   case ISD::EH_SJLJ_SETJMP:
+  case ISD::SETJMP:
   case ISD::EH_SJLJ_LONGJMP:
     // These operations are legal on our platform, but we cannot actually
     // set the operation action to Legal as common code would treat this
@@ -11192,6 +11202,8 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     return emitProbedAlloca(MI, MBB);
   case SystemZ::EH_SjLj_SetJmp:
     return emitEHSjLjSetJmp(MI, MBB);
+  case SystemZ::SetJmp:
+    return emitSetJmp(MI, MBB);
   case SystemZ::EH_SjLj_LongJmp:
     return emitEHSjLjLongJmp(MI, MBB);
 
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index bb3eeba6446d2..a6a7f5dfd5a8b 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -115,6 +115,8 @@ class SystemZTargetLowering : public TargetLowering {
   }
   MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
                                       MachineBasicBlock *MBB) const;
+  MachineBasicBlock *emitSetJmp(MachineInstr &MI,
+                                MachineBasicBlock *MBB) const;
 
   MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
                                        MachineBasicBlock *MBB) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 35a923d070e3e..6755578236b46 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1920,8 +1920,10 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in {
 //--------------------------------------------------------------------------
 let isBarrier = 1,  hasNoSchedulingInfo = 1 in {
   let hasSideEffects = 1, usesCustomInserter = 1 in {
-    def EH_SjLj_SetJmp : Pseudo<(outs GR32:$dst), (ins ADDR64:$R2), 
+    def EH_SjLj_SetJmp : Pseudo<(outs GR32:$dst), (ins ADDR64:$R2),
                         [(set GR32:$dst, (z_eh_sjlj_setjmp ADDR64:$R2))]>;
+    def SetJmp : Pseudo<(outs GR32:$dst), (ins ADDR64:$R2),
+                        [(set GR32:$dst, (z_setjmp ADDR64:$R2))]>;
     let isTerminator = 1 in {
       def EH_SjLj_LongJmp : Pseudo<(outs), (ins ADDR64:$R2), 
                            [(z_eh_sjlj_longjmp  ADDR64:$R2)]>;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 2a5b0435c1565..36597fb5ecafd 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -414,6 +414,8 @@ def z_tdc               : SDNode<"SystemZISD::TDC", SDT_ZTest>;
 
 def z_eh_sjlj_setjmp    : SDNode<"ISD::EH_SJLJ_SETJMP", SDT_ZSetJmp,
                                  [SDNPHasChain, SDNPSideEffect]>;
+def z_setjmp            : SDNode<"ISD::SETJMP", SDT_ZSetJmp,
+                                 [SDNPHasChain, SDNPSideEffect]>;
 def z_eh_sjlj_longjmp   : SDNode<"ISD::EH_SJLJ_LONGJMP", SDT_ZLongJmp,
                                  [SDNPHasChain, SDNPSideEffect]>;
 
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 141196c332074..5958d2cec5246 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -298,6 +298,7 @@ void VETargetLowering::initSPUActions() {
   /// SJLJ instructions {
   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::SETJMP, MVT::i32, Custom);
   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
   /// } SJLJ instructions
 
@@ -1663,6 +1664,12 @@ SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
                      Op.getOperand(1));
 }
 
+SDValue VETargetLowering::lowerSETJMP(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  return DAG.getNode(VEISD::SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other),
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
                                                       SelectionDAG &DAG) const {
   SDLoc DL(Op);
@@ -1833,6 +1840,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return lowerEH_SJLJ_LONGJMP(Op, DAG);
   case ISD::EH_SJLJ_SETJMP:
     return lowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::SETJMP:
+    return lowerSETJMP(Op, DAG);
   case ISD::EH_SJLJ_SETUP_DISPATCH:
     return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
   case ISD::FRAMEADDR:
@@ -2246,6 +2255,140 @@ VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   return SinkMBB;
 }
 
+MachineBasicBlock *
+VETargetLowering::emitSetJmp(MachineInstr &MI,
+                             MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const BasicBlock *BB = MBB->getBasicBlock();
+  MachineFunction::iterator I = ++MBB->getIterator();
+
+  // Memory Reference.
+  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
+  Register BufReg = MI.getOperand(1).getReg();
+
+  Register DstReg;
+
+  DstReg = MI.getOperand(0).getReg();
+  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+  (void)TRI;
+  Register MainDestReg = MRI.createVirtualRegister(RC);
+  Register RestoreDestReg = MRI.createVirtualRegister(RC);
+
+  // Buffer layout:
+  //   buf[0] = Frame Pointer (SX9, offset 0)
+  //   buf[1] = IP (offset 8)
+  //   buf[2] = Stack Pointer (SX11, offset 16)
+  //   buf[3] = Base Pointer (SX17, offset 24)
+
+  MachineBasicBlock *ThisMBB = MBB;
+  MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, MainMBB);
+  MF->insert(I, SinkMBB);
+  MF->push_back(RestoreMBB);
+  RestoreMBB->setMachineBlockAddressTaken();
+
+  // Transfer the remainder of BB and its successor edges to SinkMBB.
+  SinkMBB->splice(SinkMBB->begin(), MBB,
+                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // ThisMBB:
+  Register LabelReg =
+      prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
+
+  // Store FP (SX9) to buf[0].
+  const VEFrameLowering *TFI = Subtarget->getFrameLowering();
+  if (TFI->hasFP(*MF)) {
+    MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+    MIB.addReg(BufReg);
+    MIB.addImm(0);
+    MIB.addImm(0);
+    MIB.addReg(VE::SX9);
+    MIB.setMemRefs(MMOs);
+  }
+
+  // Store SP (SX11) to buf[2].
+  {
+    MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+    MIB.addReg(BufReg);
+    MIB.addImm(0);
+    MIB.addImm(16);
+    MIB.addReg(VE::SX11);
+    MIB.setMemRefs(MMOs);
+  }
+
+  // Store BP in buf[3] iff this function is using BP.
+  if (TFI->hasBP(*MF)) {
+    MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+    MIB.addReg(BufReg);
+    MIB.addImm(0);
+    MIB.addImm(24);
+    MIB.addReg(VE::SX17);
+    MIB.setMemRefs(MMOs);
+  }
+
+  // Store IP in buf[1].
+  MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
+  MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
+  MIB.addImm(0);
+  MIB.addImm(8);
+  MIB.addReg(LabelReg, getKillRegState(true));
+  MIB.setMemRefs(MMOs);
+
+  // Insert setup.
+  MIB =
+      BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
+
+  const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  MIB.addRegMask(RegInfo->getNoPreservedMask());
+  ThisMBB->addSuccessor(MainMBB);
+  ThisMBB->addSuccessor(RestoreMBB);
+
+  // MainMBB:
+  BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
+      .addImm(0)
+      .addImm(0)
+      .addImm(0);
+  MainMBB->addSuccessor(SinkMBB);
+
+  // SinkMBB:
+  BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
+      .addReg(MainDestReg)
+      .addMBB(MainMBB)
+      .addReg(RestoreDestReg)
+      .addMBB(RestoreMBB);
+
+  // RestoreMBB:
+  // Restore BP from buf[3] iff this function is using BP.  The address of
+  // buf is in SX10.
+  // FIXME: Better to not use SX10 here
+  if (TFI->hasBP(*MF)) {
+    MachineInstrBuilder MIB =
+        BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
+    MIB.addReg(VE::SX10);
+    MIB.addImm(0);
+    MIB.addImm(24);
+    MIB.setMemRefs(MMOs);
+  }
+  BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
+      .addImm(0)
+      .addImm(0)
+      .addImm(1);
+  BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
+  RestoreMBB->addSuccessor(SinkMBB);
+
+  MI.eraseFromParent();
+  return SinkMBB;
+}
+
 MachineBasicBlock *
 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
                                     MachineBasicBlock *MBB) const {
@@ -2626,6 +2769,8 @@ VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     return emitEHSjLjLongJmp(MI, BB);
   case VE::EH_SjLj_SetJmp:
     return emitEHSjLjSetJmp(MI, BB);
+  case VE::SetJmp:
+    return emitSetJmp(MI, BB);
   case VE::EH_SjLj_Setup_Dispatch:
     return emitSjLjDispatchBlock(MI, BB);
   }
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 487804194757e..d3bcc0bf46a6c 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -190,6 +190,7 @@ class VETargetLowering : public TargetLowering {
   SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSETJMP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
@@ -219,6 +220,8 @@ class VETargetLowering : public TargetLowering {
                                        MachineBasicBlock *MBB) const;
   MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
                                       MachineBasicBlock *MBB) const;
+  MachineBasicBlock *emitSetJmp(MachineInstr &MI,
+                                MachineBasicBlock *MBB) const;
   MachineBasicBlock *emitSjLjDispatchBlock(MachineInstr &MI,
                                            MachineBasicBlock *BB) const;
 
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 9869f95ae5661..fdffa9a434201 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -465,6 +465,10 @@ def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP",
                              SDTypeProfile<1, 1, [SDTCisInt<0>,
                                                   SDTCisPtrTy<1>]>,
                              [SDNPHasChain, SDNPSideEffect]>;
+def VEsetjmp: SDNode<"VEISD::SETJMP",
+                     SDTypeProfile<1, 1, [SDTCisInt<0>,
+                                          SDTCisPtrTy<1>]>,
+                     [SDNPHasChain, SDNPSideEffect]>;
 def VEeh_sjlj_longjmp: SDNode<"VEISD::EH_SJLJ_LONGJMP",
                               SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
                               [SDNPHasChain, SDNPSideEffect]>;
@@ -1917,6 +1921,9 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
   def EH_SjLj_SetJmp  : Pseudo<(outs I32:$dst), (ins I64:$buf),
                                "# EH_SJLJ_SETJMP",
                                [(set I32:$dst, (VEeh_sjlj_setjmp I64:$buf))]>;
+  def SetJmp           : Pseudo<(outs I32:$dst), (ins I64:$buf),
+                               "# SETJMP",
+                               [(set I32:$dst, (VEsetjmp I64:$buf))]>;
 
   def EH_SjLj_Setup_Dispatch : Pseudo<(outs), (ins), "# EH_SJLJ_SETUP_DISPATCH",
                                       [(VEeh_sjlj_setup_dispatch)]>;
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index be95168f2de00..bbef3ed46c213 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3035,6 +3035,7 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
       Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
       Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
       Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
+      Parent->getOpcode() != X86ISD::SETJMP && // setjmp
       Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
     unsigned AddrSpace =
       cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f0e3bd3cebd66..137f8da87deec 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -510,6 +510,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
   // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::SETJMP, MVT::i32, Custom);
   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
 
@@ -28913,6 +28914,18 @@ SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
                      Op.getOperand(0), Op.getOperand(1));
 }
 
+SDValue X86TargetLowering::lowerSETJMP(SDValue Op,
+                                       SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  if (!Subtarget.is64Bit()) {
+    const X86InstrInfo *TII = Subtarget.getInstrInfo();
+    (void)TII->getGlobalBaseReg(&DAG.getMachineFunction());
+  }
+  return DAG.getNode(X86ISD::SETJMP, DL,
+                     DAG.getVTList(MVT::i32, MVT::Other),
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
 SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
                                                 SelectionDAG &DAG) const {
   SDLoc DL(Op);
@@ -34209,6 +34222,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::EH_RETURN:          return LowerEH_RETURN(Op, DAG);
   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::SETJMP:             return lowerSETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
   case ISD::EH_SJLJ_SETUP_DISPATCH:
     return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
@@ -37578,6 +37592,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
     MIB.addMBB(restoreMBB);
   MIB.setMemRefs(MMOs);
 
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
   if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) {
     emitSetJmpShadowStackFix(MI, thisMBB);
   }
@@ -37586,7 +37602,6 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup))
           .addMBB(restoreMBB);
 
-  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   MIB.addRegMask(RegInfo->getNoPreservedMask());
   thisMBB->addSuccessor(mainMBB);
   thisMBB->addSuccessor(restoreMBB);
@@ -37623,6 +37638,165 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   return sinkMBB;
 }
 
+// Expand @llvm.setjmp pseudo. Like emitEHSjLjSetJmp but the backend is
+// responsible for storing FP and SP into the buffer (the frontend does not
+// emit @llvm.frameaddress / @llvm.stacksave stores).
+MachineBasicBlock *
+X86TargetLowering::emitSetJmp(MachineInstr &MI,
+                              MachineBasicBlock *MBB) const {
+  const MIMetadata MIMD(MI);
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const BasicBlock *BB = MBB->getBasicBlock();
+  MachineFunction::iterator I = ++MBB->getIterator();
+
+  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
+
+  unsigned MemOpndSlot = 0;
+  unsigned CurOp = 0;
+
+  Register DstReg = MI.getOperand(CurOp++).getReg();
+  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+  (void)TRI;
+  Register mainDstReg = MRI.createVirtualRegister(RC);
+  Register restoreDstReg = MRI.createVirtualRegister(RC);
+
+  MemOpndSlot = CurOp;
+
+  MVT PVT = getPointerTy(MF->getDataLayout());
+  assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
+
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, mainMBB);
+  MF->insert(I, sinkMBB);
+  MF->push_back(restoreMBB);
+  restoreMBB->setMachineBlockAddressTaken();
+
+  MachineInstrBuilder MIB;
+
+  sinkMBB->splice(sinkMBB->begin(), MBB,
+                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // thisMBB:
+  unsigned PtrStoreOpc = 0;
+  Register LabelReg;
+  const int64_t LabelOffset = 1 * PVT.getStoreSize();
+  bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
+                     !isPositionIndependent();
+
+  // Prepare IP either in reg or imm.
+  if (!UseImmLabel) {
+    PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+    const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+    LabelReg = MRI.createVirtualRegister(PtrRC);
+    if (Subtarget.is64Bit()) {
+      MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA64r), LabelReg)
+              .addReg(X86::RIP)
+              .addImm(0)
+              .addReg(0)
+              .addMBB(restoreMBB)
+              .addReg(0);
+    } else {
+      const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
+      MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA32r), LabelReg)
+              .addReg(XII->getGlobalBaseReg(MF))
+              .addImm(0)
+              .addReg(0)
+              .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
+              .addReg(0);
+    }
+  } else
+    PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
+
+  // Store IP to buf[1].
+  MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrStoreOpc));
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset);
+    else
+      MIB.add(MI.getOperand(MemOpndSlot + i));
+  }
+  if (!UseImmLabel)
+    MIB.addReg(LabelReg);
+  else
+    MIB.addMBB(restoreMBB);
+  MIB.setMemRefs(MMOs);
+
+  // Store FP to buf[0] and SP to buf[2].
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  unsigned RegStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+
+  bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
+  if (HasFP) {
+    MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(RegStoreOpc));
+    for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+      MIB.add(MI.getOperand(MemOpndSlot + i));
+    MIB.addReg(RegInfo->getFrameRegister(*MF));
+    MIB.setMemRefs(MMOs);
+  }
+
+  const int64_t SPOffset = 2 * PVT.getStoreSize();
+  MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(RegStoreOpc));
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI.getOperand(MemOpndSlot + i), SPOffset);
+    else
+      MIB.add(MI.getOperand(MemOpndSlot + i));
+  }
+  MIB.addReg(RegInfo->getStackRegister());
+  MIB.setMemRefs(MMOs);
+
+  if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) {
+    emitSetJmpShadowStackFix(MI, thisMBB);
+  }
+
+  // Setup
+  MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup))
+          .addMBB(restoreMBB);
+
+  MIB.addRegMask(RegInfo->getNoPreservedMask());
+  thisMBB->addSuccessor(mainMBB);
+  thisMBB->addSuccessor(restoreMBB);
+
+  // mainMBB:
+  BuildMI(mainMBB, MIMD, TII->get(X86::MOV32r0), mainDstReg);
+  mainMBB->addSuccessor(sinkMBB);
+
+  // sinkMBB:
+  BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)
+      .addReg(mainDstReg)
+      .addMBB(mainMBB)
+      .addReg(restoreDstReg)
+      .addMBB(restoreMBB);
+
+  // restoreMBB:
+  if (RegInfo->hasBasePointer(*MF)) {
+    const bool Uses64BitFramePtr = Subtarget.isTarget64BitLP64();
+    X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+    X86FI->setRestoreBasePointer(MF);
+    Register FramePtr = RegInfo->getFrameRegister(*MF);
+    Register BasePtr = RegInfo->getBaseRegister();
+    unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
+    addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr),
+                 FramePtr, true, X86FI->getRestoreBasePointerOffset())
+      .setMIFlag(MachineInstr::FrameSetup);
+  }
+  BuildMI(restoreMBB, MIMD, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
+  BuildMI(restoreMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);
+  restoreMBB->addSuccessor(sinkMBB);
+
+  MI.eraseFromParent();
+  return sinkMBB;
+}
+
 /// Fix the shadow stack using the previously saved SSP pointer.
 /// \sa emitSetJmpShadowStackFix
 /// \param [in] MI The temporary Machine Instruction for the builtin.
@@ -38408,6 +38582,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case X86::EH_SjLj_SetJmp64:
     return emitEHSjLjSetJmp(MI, BB);
 
+  case X86::SetJmp32:
+  case X86::SetJmp64:
+    return emitSetJmp(MI, BB);
+
   case X86::EH_SjLj_LongJmp32:
   case X86::EH_SjLj_LongJmp64:
     return emitEHSjLjLongJmp(MI, BB);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5c7c54cacd239..1274089fea7da 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -823,6 +823,7 @@ namespace llvm {
                                             ISD::ArgFlagsTy Flags) const;
     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerSETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
@@ -928,6 +929,8 @@ namespace llvm {
 
     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
                                         MachineBasicBlock *MBB) const;
+    MachineBasicBlock *emitSetJmp(MachineInstr &MI,
+                                  MachineBasicBlock *MBB) const;
 
     void emitSetJmpShadowStackFix(MachineInstr &MI,
                                   MachineBasicBlock *MBB) const;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index bc05dae7351bb..847d141d1ce90 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -214,6 +214,14 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
                             "#EH_SJLJ_SETJMP64",
                             [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
                           Requires<[In64BitMode]>;
+  def SetJmp32  : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
+                    "#SETJMP32",
+                    [(set GR32:$dst, (X86setjmp addr:$buf))]>,
+                  Requires<[Not64BitMode]>;
+  def SetJmp64  : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
+                    "#SETJMP64",
+                    [(set GR32:$dst, (X86setjmp addr:$buf))]>,
+                  Requires<[In64BitMode]>;
   let isTerminator = 1 in {
   def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
                             "#EH_SJLJ_LONGJMP32",
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index 3cd05ab0351bd..fe59fbfc6201b 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -352,6 +352,10 @@ def X86eh_sjlj_setjmp  : SDNode<"X86ISD::EH_SJLJ_SETJMP",
                                 SDTypeProfile<1, 1, [SDTCisInt<0>,
                                                      SDTCisPtrTy<1>]>,
                                 [SDNPHasChain, SDNPSideEffect]>;
+def X86setjmp          : SDNode<"X86ISD::SETJMP",
+                                SDTypeProfile<1, 1, [SDTCisInt<0>,
+                                                     SDTCisPtrTy<1>]>,
+                                [SDNPHasChain, SDNPSideEffect]>;
 
 // SjLj exception handling longjmp.
 def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP",
diff --git a/llvm/test/CodeGen/ARM/setjmp.ll b/llvm/test/CodeGen/ARM/setjmp.ll
new file mode 100644
index 0000000000000..e322bbe4d5d12
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/setjmp.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck --check-prefix=ARM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck --check-prefix=THUMB2 %s
+
+; Verify that @llvm.setjmp produces the same FP/SP stores as the old pattern
+; of @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+
+ at buf = internal global [5 x ptr] zeroinitializer
+
+; --- Old pattern (frameaddress + stacksave + stores + eh.sjlj.setjmp) ---
+
+declare ptr @llvm.frameaddress(i32) nounwind readnone
+declare ptr @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define i32 @old_setjmp() nounwind "frame-pointer"="all" {
+  %fp = call ptr @llvm.frameaddress(i32 0)
+  store ptr %fp, ptr @buf, align 16
+  %sp = call ptr @llvm.stacksave()
+  store ptr %sp, ptr getelementptr inbounds ([5 x ptr], ptr @buf, i64 0, i64 2), align 16
+  %r = call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+  ret i32 %r
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define i32 @new_setjmp() nounwind "frame-pointer"="all" {
+  %r = call i32 @llvm.setjmp(ptr @buf)
+  ret i32 %r
+}
+
+; Both functions should store FP (r7) to buf[0] and SP to buf[2].
+
+; ARM-LABEL: _old_setjmp:
+; ARM:       str r7, [r0]
+; ARM:       str sp, [r0, #8]
+; ARM-LABEL: _new_setjmp:
+; ARM:       str r7, [r0]
+; ARM:       str sp, [r0, #8]
+
+; THUMB2-LABEL: _old_setjmp:
+; THUMB2:       str r7, [r0]
+; THUMB2:       str.w sp, [r0, #8]
+; THUMB2-LABEL: _new_setjmp:
+; THUMB2:       str r7, [r0]
+; THUMB2:       str.w sp, [r0, #8]
diff --git a/llvm/test/CodeGen/PowerPC/setjmp.ll b/llvm/test/CodeGen/PowerPC/setjmp.ll
new file mode 100644
index 0000000000000..f0b86a790c488
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/setjmp.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -verify-machineinstrs | FileCheck %s
+
+; Verify that @llvm.setjmp produces the same FP/SP stores as the old pattern
+; of @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+
+ at buf = internal global [5 x ptr] zeroinitializer, align 8
+
+; --- Old pattern (frameaddress + stacksave + stores + eh.sjlj.setjmp) ---
+
+declare ptr @llvm.frameaddress(i32) nounwind readnone
+declare ptr @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define i32 @old_setjmp() nounwind "frame-pointer"="all" {
+  %fp = call ptr @llvm.frameaddress(i32 0)
+  store ptr %fp, ptr @buf, align 8
+  %sp = call ptr @llvm.stacksave()
+  store ptr %sp, ptr getelementptr inbounds (ptr, ptr @buf, i64 2), align 8
+  %r = call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+  ret i32 %r
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define i32 @new_setjmp() nounwind "frame-pointer"="all" {
+  %r = call i32 @llvm.setjmp(ptr @buf)
+  ret i32 %r
+}
+
+; Both functions should store FP (r31) to buf[0] and SP (r1) to buf[2] (offset 16).
+
+; CHECK-LABEL: old_setjmp:
+; CHECK:       std 31, buf at toc@l(
+; CHECK:       std 1, 16(
+; CHECK-LABEL: new_setjmp:
+; CHECK:       std 31, 0(
+; CHECK:       std 1, 16(
diff --git a/llvm/test/CodeGen/SystemZ/setjmp.ll b/llvm/test/CodeGen/SystemZ/setjmp.ll
new file mode 100644
index 0000000000000..01c78e58aff2d
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/setjmp.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Verify that @llvm.setjmp produces the same IP/SP stores as the old pattern
+; of @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+; SystemZ's eh.sjlj.setjmp already stores IP and SP internally, so
+; the old and new patterns should produce identical output.
+
+ at buf = global [20 x ptr] zeroinitializer, align 8
+
+; --- Old pattern (eh.sjlj.setjmp, which already stores IP+SP on SystemZ) ---
+
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define void @old_setjmp() nounwind {
+  %r = tail call i32 @llvm.eh.sjlj.setjmp(ptr nonnull @buf)
+  ret void
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define void @new_setjmp() nounwind {
+  %r = tail call i32 @llvm.setjmp(ptr nonnull @buf)
+  ret void
+}
+
+; Both should store IP to buf[1] (offset 8) and SP to buf[3] (offset 24).
+
+; CHECK-LABEL: old_setjmp:
+; CHECK:       stg %r0, 8(%r1)
+; CHECK:       stg %r15, 24(%r1)
+; CHECK-LABEL: new_setjmp:
+; CHECK:       stg %r0, 8(%r1)
+; CHECK:       stg %r15, 24(%r1)
diff --git a/llvm/test/CodeGen/VE/Scalar/setjmp.ll b/llvm/test/CodeGen/VE/Scalar/setjmp.ll
new file mode 100644
index 0000000000000..24747bacbbfea
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Scalar/setjmp.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=ve | FileCheck %s
+
+; Verify that @llvm.setjmp produces the same FP/SP stores as the old pattern
+; of @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+
+ at buf = common global [1 x [25 x i64]] zeroinitializer, align 8
+
+; --- Old pattern (frameaddress + stacksave + stores + eh.sjlj.setjmp) ---
+
+declare ptr @llvm.frameaddress(i32) nounwind readnone
+declare ptr @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define i32 @old_setjmp() nounwind "frame-pointer"="all" {
+  %fp = call ptr @llvm.frameaddress(i32 0)
+  store ptr %fp, ptr @buf, align 8
+  %sp = call ptr @llvm.stacksave()
+  store ptr %sp, ptr getelementptr inbounds (ptr, ptr @buf, i64 2), align 8
+  %r = call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+  ret i32 %r
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define i32 @new_setjmp() nounwind "frame-pointer"="all" {
+  %r = call i32 @llvm.setjmp(ptr @buf)
+  ret i32 %r
+}
+
+; Both functions should store FP (s9) to buf[0] and SP (s11) to buf[2].
+
+; CHECK-LABEL: old_setjmp:
+; CHECK:       st %s9, (, %s0)
+; CHECK:       st %s11, 16(, %s0)
+; CHECK-LABEL: new_setjmp:
+; CHECK:       st %s9, (, %s0)
+; CHECK:       st %s11, 16(, %s0)
diff --git a/llvm/test/CodeGen/X86/setjmp.ll b/llvm/test/CodeGen/X86/setjmp.ll
new file mode 100644
index 0000000000000..1e4b195949576
--- /dev/null
+++ b/llvm/test/CodeGen/X86/setjmp.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck --check-prefix=X86 %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck --check-prefix=X64 %s
+; RUN: llc < %s -mtriple=x86_64-windows-gnu | FileCheck --check-prefix=WIN64 %s
+
+; Verify that @llvm.setjmp produces the same output as the old pattern of
+; @llvm.frameaddress + @llvm.stacksave + stores + @llvm.eh.sjlj.setjmp.
+
+ at buf = internal global [5 x ptr] zeroinitializer
+
+; --- Old pattern (frameaddress + stacksave + stores + eh.sjlj.setjmp) ---
+
+declare ptr @llvm.frameaddress(i32) nounwind readnone
+declare ptr @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(ptr) nounwind
+
+define i32 @old_setjmp() nounwind "frame-pointer"="all" {
+  %fp = tail call ptr @llvm.frameaddress(i32 0)
+  store ptr %fp, ptr @buf, align 16
+  %sp = tail call ptr @llvm.stacksave()
+  store ptr %sp, ptr getelementptr inbounds ([5 x ptr], ptr @buf, i64 0, i64 2), align 16
+  %r = tail call i32 @llvm.eh.sjlj.setjmp(ptr @buf)
+  ret i32 %r
+}
+
+; --- New pattern (@llvm.setjmp) ---
+
+declare i32 @llvm.setjmp(ptr) nounwind
+
+define i32 @new_setjmp() nounwind "frame-pointer"="all" {
+  %r = tail call i32 @llvm.setjmp(ptr @buf)
+  ret i32 %r
+}
+
+; Both functions should store FP to buf[0], SP to buf[2], IP to buf[1].
+
+; X86-LABEL: old_setjmp:
+; X86:       movl %ebp, buf
+; X86:       movl %esp, buf+8
+; X86-LABEL: new_setjmp:
+; X86:       movl %ebp, buf
+; X86:       movl %esp, buf+8
+
+; X64-LABEL: old_setjmp:
+; X64:       movq %rbp, buf(%rip)
+; X64:       movq %rsp, buf+16(%rip)
+; X64-LABEL: new_setjmp:
+; X64:       movq %rbp, buf(%rip)
+; X64:       movq %rsp, buf+16(%rip)
+
+; On WIN64, the old pattern stores an adjusted address from @llvm.frameaddress
+; (which is wrong on WindowsCFI targets). The new @llvm.setjmp stores %rbp
+; directly, which is the correct fix.
+; WIN64-LABEL: new_setjmp:
+; WIN64:       movq %rbp, buf(%rip)
+; WIN64:       movq %rsp, buf+16(%rip)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 135d1e4007d49..ad5212fcfda45 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -230,11 +230,22 @@ class ROCDL_SpecialIdRegisterOp<string mnemonic> :
 // ROCDL vector types definitions
 //===----------------------------------------------------------------------===//
 
+class ROCDL_NamedType<string name> {
+  string typeName = name;
+}
+
 class ROCDL_ConcreteVector<Type elem, int length> :
   FixedVectorOfLengthAndType<[length], [elem]>,
   BuildableType<
     "::mlir::VectorType::get({" # length # "} ,"
-      # elem.builderCall # ")">;
+      # elem.builderCall # ")">,
+  ROCDL_NamedType<"vector<" # length # "x"
+    # !tolower(!cast<string>(elem)) # ">">;
+
+class ROCDL_Scalar<Type elem> :
+  Type<elem.predicate, elem.summary>,
+  BuildableType<elem.builderCall>,
+  ROCDL_NamedType<!tolower(!cast<string>(elem))>;
 
 def ROCDL_V2I16Type : ROCDL_ConcreteVector<I16, 2>;
 def ROCDL_V2F16Type : ROCDL_ConcreteVector<F16, 2>;
@@ -925,7 +936,7 @@ def ROCDL_IglpOpt : ROCDL_ConcreteNonMemIntrOp<"iglp.opt", [], 0, [0], ["variant
 //===---------------------------------------------------------------------===//
 // Xdlops intrinsics
 
-class ROCDL_Mfma_IntrOp<string mnemonic, Type ABType, Type CDType> :
+class ROCDL_Mfma_IntrOp<string mnemonic, ROCDL_NamedType ABType, ROCDL_NamedType CDType> :
   ROCDL_IntrOp<mnemonic, [], [], [], 1, 0, 0, 0, [3, 4, 5], ["cbsz", "abid", "blgp"]>,
   Arguments<(ins
              ABType:$a,
@@ -945,19 +956,10 @@ class ROCDL_Mfma_IntrOp<string mnemonic, Type ABType, Type CDType> :
 
     Example:
     ```mlir
-    // MFMA with f32 inputs and 32-wide f32 accumulator.
-    %r0 = rocdl.mfma.f32.32x32x1f32 %a0, %b0, %c0, 0, 0, 0 :
-      (f32, f32, vector<32xf32>) -> vector<32xf32>
-
-    // MFMA with i8 inputs and 32-wide i32 accumulator.
-    %r1 = rocdl.mfma.i32.32x32x4i8 %a1, %a1, %c1, 0, 0, 0 :
-      (i32, i32, vector<32xi32>) -> vector<32xi32>
-
-    // MFMA with bf16 inputs and 32-wide f32 accumulator.
-    %r2 = rocdl.mfma.f32.32x32x2bf16 %a2, %a2, %c0, 0, 0, 0 :
-      (vector<2xi16>, vector<2xi16>, vector<32xf32>) -> vector<32xf32>
-    ```
-  }];
+    %r0 = }] # mnemonic # [{ %a0, %b0, %c0, 0, 0, 0 : (}] # ABType.typeName
+    # [{, }] # ABType.typeName # [{, }] # CDType.typeName # [{) -> }]
+    # CDType.typeName # [{
+    ```}];
 }
 
 class ROCDL_Mfma_Scale_IntrOp<string mnemonic, Type AB, Type CD> :
@@ -1038,21 +1040,21 @@ class ROCDL_Smfmac_IntrOp<string mnemonic, Type AType, Type BType, Type CDType>
 }
 
 // Available on all CDNA.
-def ROCDL_mfma_f32_32x32x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x1f32", /*Type AB=*/F32, /*Type CD=*/ROCDL_ConcreteVector<F32, 32>>;
-def ROCDL_mfma_f32_16x16x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x1f32", F32, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_4x4x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x1f32", F32, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_32x32x2f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2f32", F32, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_16x16x4f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f32", F32, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_32x32x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x1f32", /*Type AB=*/ROCDL_Scalar<F32>, /*Type CD=*/ROCDL_ConcreteVector<F32, 32>>;
+def ROCDL_mfma_f32_16x16x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x1f32", ROCDL_Scalar<F32>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_4x4x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x1f32", ROCDL_Scalar<F32>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_32x32x2f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2f32", ROCDL_Scalar<F32>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_16x16x4f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f32", ROCDL_Scalar<F32>, ROCDL_ConcreteVector<F32, 4>>;
 def ROCDL_mfma_f32_32x32x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 32>>;
 def ROCDL_mfma_f32_16x16x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 16>>;
 def ROCDL_mfma_f32_4x4x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x4f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 4>>;
 def ROCDL_mfma_f32_32x32x8f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x8f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 16>>;
 def ROCDL_mfma_f32_16x16x16f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x16f16", ROCDL_ConcreteVector<F16, 4>, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_i32_32x32x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x4i8", I32, ROCDL_ConcreteVector<I32, 32>>;
-def ROCDL_mfma_i32_16x16x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x4i8", I32, ROCDL_ConcreteVector<I32, 16>>;
-def ROCDL_mfma_i32_4x4x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.4x4x4i8", I32, ROCDL_ConcreteVector<I32, 4>>;
-def ROCDL_mfma_i32_32x32x8i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x8i8", I32, ROCDL_ConcreteVector<I32, 16>>;
-def ROCDL_mfma_i32_16x16x16i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x16i8", I32, ROCDL_ConcreteVector<I32, 4>>;
+def ROCDL_mfma_i32_32x32x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x4i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 32>>;
+def ROCDL_mfma_i32_16x16x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x4i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 16>>;
+def ROCDL_mfma_i32_4x4x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.4x4x4i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 4>>;
+def ROCDL_mfma_i32_32x32x8i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x8i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 16>>;
+def ROCDL_mfma_i32_16x16x16i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x16i8", ROCDL_Scalar<I32>, ROCDL_ConcreteVector<I32, 4>>;
 def ROCDL_mfma_f32_32x32x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2bf16", ROCDL_ConcreteVector<I16, 2>, ROCDL_ConcreteVector<F32, 32>>;
 def ROCDL_mfma_f32_16x16x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x2bf16", ROCDL_ConcreteVector<I16, 2>, ROCDL_ConcreteVector<F32, 16>>;
 def ROCDL_mfma_f32_4x4x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x2bf16", ROCDL_ConcreteVector<I16, 2>, ROCDL_ConcreteVector<F32, 4>>;
@@ -1066,21 +1068,21 @@ def ROCDL_mfma_f32_32x32x8bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x8bf16.1k",
 def ROCDL_mfma_f32_16x16x16bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x16bf16.1k", ROCDL_ConcreteVector<I16, 4>, ROCDL_ConcreteVector<F32, 4>>;
 // Note: in gfx94x, unlike in gfx90a, the f64 xdlops use the "blgp" argument as
 // a NEG bitfield. See IntrinsicsAMDGPU.td for more info.
-def ROCDL_mfma_f64_16x16x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.16x16x4f64", F64, ROCDL_ConcreteVector<F64, 4>>;
-def ROCDL_mfma_f64_4x4x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.4x4x4f64", F64, F64>;
+def ROCDL_mfma_f64_16x16x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.16x16x4f64", ROCDL_Scalar<F64>, ROCDL_ConcreteVector<F64, 4>>;
+def ROCDL_mfma_f64_4x4x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.4x4x4f64", ROCDL_Scalar<F64>, ROCDL_Scalar<F64>>;
 // New in gfx94x.
-def ROCDL_mfma_i32_16x16x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x32.i8", I64, ROCDL_ConcreteVector<I32, 4>>;
-def ROCDL_mfma_i32_32x32x16_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x16.i8", I64, ROCDL_ConcreteVector<I32, 16>>;
+def ROCDL_mfma_i32_16x16x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x32.i8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<I32, 4>>;
+def ROCDL_mfma_i32_32x32x16_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x16.i8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<I32, 16>>;
 def ROCDL_mfma_f32_16x16x8_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x8.xf32", ROCDL_ConcreteVector<F32, 2>, ROCDL_ConcreteVector<F32, 4>>;
 def ROCDL_mfma_f32_32x32x4_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4.xf32", ROCDL_ConcreteVector<F32, 2>, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_16x16x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.bf8", I64, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_16x16x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.fp8", I64, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_16x16x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.bf8", I64, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_16x16x32_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.fp8", I64, ROCDL_ConcreteVector<F32, 4>>;
-def ROCDL_mfma_f32_32x32x16_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.bf8", I64, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_32x32x16_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.fp8", I64, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_32x32x16_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.bf8", I64, ROCDL_ConcreteVector<F32, 16>>;
-def ROCDL_mfma_f32_32x32x16_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.fp8", I64, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_16x16x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.bf8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_16x16x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.fp8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_16x16x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.bf8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_16x16x32_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.fp8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 4>>;
+def ROCDL_mfma_f32_32x32x16_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.bf8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_32x32x16_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.fp8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_32x32x16_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.bf8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 16>>;
+def ROCDL_mfma_f32_32x32x16_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.fp8", ROCDL_Scalar<I64>, ROCDL_ConcreteVector<F32, 16>>;
 // New in gfx950.
 def ROCDL_mfma_f32_16x16x32_bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf16", ROCDL_ConcreteVector<BF16, 8>, ROCDL_ConcreteVector<F32, 4>>;
 def ROCDL_mfma_i32_16x16x64_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x64.i8", ROCDL_ConcreteVector<I32, 4>, ROCDL_ConcreteVector<I32, 4>>;



More information about the Mlir-commits mailing list