[llvm] AMDGPU: Avoid crashing on statepoint-like pseudoinstructions (PR #170657)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 14:45:02 PST 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/170657
>From 42a317918e07e32382b8bef644f052d3fed7fc32 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 4 Dec 2025 12:42:14 +0100
Subject: [PATCH] AMDGPU: Avoid crashing on statepoint-like pseudoinstructions
At the moment the MIR tests are somewhat redundant. The waitcnt
one is needed to ensure we actually have a load, given we are
currently just emitting an error on ExternalSymbol. The asm printer
one is more redundant for the moment, since it's stressed by the IR
test. However I am planning to change the error path for the IR test,
so it will soon not be redundant.
---
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 13 +++-
.../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 11 ++++
.../SelectionDAG/StatepointLowering.cpp | 2 +
llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 10 +++
.../AMDGPU/AMDGPUResourceUsageAnalysis.cpp | 5 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 12 ++++
llvm/lib/Target/AMDGPU/SIISelLowering.h | 2 +
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 8 +++
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +
llvm/test/CodeGen/AMDGPU/llvm.deoptimize.ll | 16 +++++
.../CodeGen/AMDGPU/statepoint-asm-printer.mir | 40 ++++++++++++
.../AMDGPU/statepoint-insert-waitcnts.mir | 64 +++++++++++++++++++
13 files changed, 184 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.deoptimize.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/statepoint-asm-printer.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/statepoint-insert-waitcnts.mir
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 2a9522452d7cd..11adc190b2a62 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2350,7 +2350,18 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
/// Returns the callee operand from the given \p MI.
virtual const MachineOperand &getCalleeOperand(const MachineInstr &MI) const {
- return MI.getOperand(0);
+ assert(MI.isCall());
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::STATEPOINT:
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::PATCHPOINT:
+ return MI.getOperand(3);
+ default:
+ return MI.getOperand(0);
+ }
+
+ llvm_unreachable("impossible call instruction");
}
/// Return the uniformity behavior of the given instruction.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index dd8f18d3b8a6a..7998da0ea06eb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -331,6 +331,17 @@ namespace llvm {
MachineBasicBlock *
TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::STATEPOINT:
+ // As an implementation detail, STATEPOINT shares the STACKMAP format at
+ // this point in the process. We diverge later.
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::PATCHPOINT:
+ return emitPatchPoint(MI, MBB);
+ default:
+ break;
+ }
+
#ifndef NDEBUG
dbgs() << "If a target marks an instruction with "
"'usesCustomInserter', it must implement "
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 46a5e44374e1c..5b8cd343557fa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1145,6 +1145,8 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB,
bool VarArgDisallowed, bool ForceVoidReturnTy) {
StatepointLoweringInfo SI(DAG);
+ SI.CLI.CB = Call;
+
unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
populateCallLoweringInfo(
SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index bf9b4297bd435..99c1ab8d379d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -406,6 +406,16 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
+ unsigned Opc = MI->getOpcode();
+ if (LLVM_UNLIKELY(Opc == TargetOpcode::STATEPOINT ||
+ Opc == TargetOpcode::STACKMAP ||
+ Opc == TargetOpcode::PATCHPOINT)) {
+ LLVMContext &Ctx = MI->getMF()->getFunction().getContext();
+ Ctx.emitError("unhandled statepoint-like instruction");
+ OutStreamer->emitRawComment("unsupported statepoint/stackmap/patchpoint");
+ return;
+ }
+
if (isVerbose())
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index b03d50f2d451d..4e664e084fb88 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -256,10 +256,13 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
// Pseudo used just to encode the underlying global. Is there a better
// way to track this?
+ // TODO: Some of the generic call-like pseudos do not encode the callee,
+ // so we overly conservatively treat this as an indirect call.
const MachineOperand *CalleeOp =
TII->getNamedOperand(MI, AMDGPU::OpName::callee);
- const Function *Callee = getCalleeFunction(*CalleeOp);
+ const Function *Callee =
+ CalleeOp ? getCalleeFunction(*CalleeOp) : nullptr;
auto isSameFunction = [](const MachineFunction &MF, const Function *F) {
return F == &MF.getFunction();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ff67fd63ea75e..89b235e384e2f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -275,6 +275,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v16i64, MVT::v16i32, Expand);
setOperationAction(ISD::GlobalAddress, {MVT::i32, MVT::i64}, Custom);
+ setOperationAction(ISD::ExternalSymbol, {MVT::i32, MVT::i64}, Custom);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
@@ -6840,6 +6841,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
return LowerGlobalAddress(MFI, Op, DAG);
}
+ case ISD::ExternalSymbol:
+ return LowerExternalSymbol(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
@@ -9019,6 +9022,15 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
MachineMemOperand::MOInvariant);
}
+SDValue SITargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
+ // TODO: Handle this. It should be mostly the same as LowerGlobalAddress.
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
+ Fn, "unsupported external symbol", Op.getDebugLoc()));
+ return DAG.getPOISON(Op.getValueType());
+}
+
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain,
const SDLoc &DL, SDValue V) const {
// We can't use S_MOV_B32 directly, because there is no way to specify m0 as
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index fb162948caf4c..e82f4528fcd09 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -79,6 +79,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
MVT VT, unsigned Offset) const;
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 5c024252fdbe5..56908e39d59a5 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1951,7 +1951,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// load). We also need to check WAW dependency with saved PC.
Wait = AMDGPU::Waitcnt();
- const auto &CallAddrOp = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand &CallAddrOp = TII->getCalleeOperand(MI);
if (CallAddrOp.isReg()) {
RegInterval CallAddrOpInterval =
ScoreBrackets.getRegInterval(&MI, CallAddrOp);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6d2110957002a..e60b22ed567c2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10521,6 +10521,14 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return SchedModel.computeInstrLatency(&MI);
}
+const MachineOperand &
+SIInstrInfo::getCalleeOperand(const MachineInstr &MI) const {
+ if (const MachineOperand *CallAddrOp =
+ getNamedOperand(MI, AMDGPU::OpName::src0))
+ return *CallAddrOp;
+ return TargetInstrInfo::getCalleeOperand(MI);
+}
+
InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b1d6563bf3c0b..7d7cbda644b77 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1641,6 +1641,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
const MachineInstr &MI,
unsigned *PredCost = nullptr) const override;
+ const MachineOperand &getCalleeOperand(const MachineInstr &MI) const override;
+
InstructionUniformity
getInstructionUniformity(const MachineInstr &MI) const final;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.deoptimize.ll b/llvm/test/CodeGen/AMDGPU/llvm.deoptimize.ll
new file mode 100644
index 0000000000000..ef8c00bb3b4b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.deoptimize.ll
@@ -0,0 +1,16 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2> %t.err | FileCheck %s
+; RUN: FileCheck -check-prefix=ERR %s < %t.err
+
+; ERR: error: <unknown>:0:0: in function caller_0 i32 (): unsupported external symbol
+; ERR: error: unhandled statepoint-like instruction
+
+; CHECK: ;unsupported statepoint/stackmap/patchpoint
+declare i32 @llvm.experimental.deoptimize.i32(...)
+declare i8 @llvm.experimental.deoptimize.i8(...)
+
+define i32 @caller_0() {
+entry:
+ %v = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ]
+ ret i32 %v
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/statepoint-asm-printer.mir b/llvm/test/CodeGen/AMDGPU/statepoint-asm-printer.mir
new file mode 100644
index 0000000000000..1c9ae91557b7e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/statepoint-asm-printer.mir
@@ -0,0 +1,40 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-asm-printer -o - %s 2> %t.err | FileCheck %s
+# RUN: FileCheck -check-prefix=ERR %s < %t.err
+
+# CHECK: ;unsupported statepoint/stackmap/patchpoint
+# ERR: error: unhandled statepoint-like instruction
+
+---
+name: test_statepoint
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 16
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+ isCalleeSavedInfoValid: true
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4 }
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0.entry:
+ liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+
+ S_WAITCNT 0
+ $sgpr16 = S_MOV_B32 $sgpr33
+ $sgpr33 = S_MOV_B32 $sgpr32
+ $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ $exec = S_MOV_B64 killed $sgpr18_sgpr19
+ $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
+ $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40
+ $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 1024, implicit-def dead $scc
+ $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40
+ STATEPOINT 2882400015, 0, 11, undef renamable $sgpr4_sgpr5, 0, killed $sgpr4_sgpr5, killed $sgpr6_sgpr7, killed $sgpr8_sgpr9, killed $sgpr10_sgpr11, killed $sgpr12, killed $sgpr13, killed $sgpr14, killed $sgpr15, killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, csr_amdgpu
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/statepoint-insert-waitcnts.mir b/llvm/test/CodeGen/AMDGPU/statepoint-insert-waitcnts.mir
new file mode 100644
index 0000000000000..b0035e2e282f6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/statepoint-insert-waitcnts.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-insert-waitcnts -o - %s | FileCheck %s
+
+# Make sure the waitcnt pass doesn't crash on statepoint
+# pseudoinstructions, and handles the wait for the callee operand
+# correctly.
+
+---
+name: test_wait_statepoint_callee
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 16
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+ isCalleeSavedInfoValid: true
+stack:
+ - { id: 0, offset: 4, size: 4, alignment: 4 }
+ - { id: 1, type: spill-slot, size: 4, alignment: 4 }
+ - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ spillPhysVGPRs:
+ - '$vgpr40'
+ wwmReservedRegs:
+ - '$vgpr40'
+ scavengeFI: '%stack.0'
+body: |
+ bb.0:
+ liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13
+
+ ; CHECK-LABEL: name: test_wait_statepoint_callee
+ ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: $sgpr16 = S_MOV_B32 $sgpr33
+ ; CHECK-NEXT: $sgpr33 = S_MOV_B32 $sgpr32
+ ; CHECK-NEXT: $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr18_sgpr19
+ ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
+ ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40
+ ; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 1024, implicit-def dead $scc
+ ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40
+ ; CHECK-NEXT: $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+ ; CHECK-NEXT: S_WAITCNT 49279
+ ; CHECK-NEXT: STATEPOINT 2882400015, 0, 11, renamable $sgpr14_sgpr15, 0, killed $sgpr4_sgpr5, killed $sgpr6_sgpr7, killed $sgpr8_sgpr9, killed $sgpr10_sgpr11, killed $sgpr12, killed $sgpr13, killed $sgpr14, killed $sgpr15, killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, csr_amdgpu
+ $sgpr16 = S_MOV_B32 $sgpr33
+ $sgpr33 = S_MOV_B32 $sgpr32
+ $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ $exec = S_MOV_B64 killed $sgpr18_sgpr19
+ $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
+ $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40
+ $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 1024, implicit-def dead $scc
+ $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40
+ $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+ STATEPOINT 2882400015, 0, 11, renamable $sgpr14_sgpr15, 0, killed $sgpr4_sgpr5, killed $sgpr6_sgpr7, killed $sgpr8_sgpr9, killed $sgpr10_sgpr11, killed $sgpr12, killed $sgpr13, killed $sgpr14, killed $sgpr15, killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, csr_amdgpu
+
+...
More information about the llvm-commits
mailing list