[llvm] c8f4c35 - AMDGPU: Correctly handle folding immediates into subregister use operands (#129664)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 4 10:06:15 PST 2025
Author: Matt Arsenault
Date: 2025-03-05T01:06:11+07:00
New Revision: c8f4c35a6624c23632fbca7f5f384655ef4811f0
URL: https://github.com/llvm/llvm-project/commit/c8f4c35a6624c23632fbca7f5f384655ef4811f0
DIFF: https://github.com/llvm/llvm-project/commit/c8f4c35a6624c23632fbca7f5f384655ef4811f0.diff
LOG: AMDGPU: Correctly handle folding immediates into subregister use operands (#129664)
This fixes a miscompile where a 64-bit materialize incorrectly folds
into
a sub1 use operand.
We currently do not see many subregister use operands. Incidentally,
there are also SIFoldOperands bugs that prevent this fold from
appearing here. Pre-fix folding of 32-bit subregister uses from 64-bit
materializes, in preparation for future patches.
The existing APIs are awkward since they expect to have a fully formed
instruction with operands to use, and not something new which needs
to be created.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3a3f303293461..2e66796bcb6bc 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -50,6 +50,11 @@ struct FoldCandidate {
}
}
+ FoldCandidate(MachineInstr *MI, unsigned OpNo, int64_t FoldImm,
+ bool Commuted_ = false, int ShrinkOp = -1)
+ : UseMI(MI), ImmToFold(FoldImm), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+ Kind(MachineOperand::MO_Immediate), Commuted(Commuted_) {}
+
bool isFI() const {
return Kind == MachineOperand::MO_FrameIndex;
}
@@ -578,16 +583,29 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
}
static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
- MachineInstr *MI, unsigned OpNo,
- MachineOperand *FoldOp, bool Commuted = false,
- int ShrinkOp = -1) {
+ FoldCandidate &&Entry) {
// Skip additional folding on the same operand.
for (FoldCandidate &Fold : FoldList)
- if (Fold.UseMI == MI && Fold.UseOpNo == OpNo)
+ if (Fold.UseMI == Entry.UseMI && Fold.UseOpNo == Entry.UseOpNo)
return;
- LLVM_DEBUG(dbgs() << "Append " << (Commuted ? "commuted" : "normal")
- << " operand " << OpNo << "\n " << *MI);
- FoldList.emplace_back(MI, OpNo, FoldOp, Commuted, ShrinkOp);
+ LLVM_DEBUG(dbgs() << "Append " << (Entry.Commuted ? "commuted" : "normal")
+ << " operand " << Entry.UseOpNo << "\n " << *Entry.UseMI);
+ FoldList.push_back(Entry);
+}
+
+static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
+ MachineInstr *MI, unsigned OpNo,
+ MachineOperand *FoldOp, bool Commuted = false,
+ int ShrinkOp = -1) {
+ appendFoldCandidate(FoldList,
+ FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
+}
+
+static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
+ MachineInstr *MI, unsigned OpNo, int64_t ImmVal,
+ bool Commuted = false, int ShrinkOp = -1) {
+ appendFoldCandidate(FoldList,
+ FoldCandidate(MI, OpNo, ImmVal, Commuted, ShrinkOp));
}
bool SIFoldOperandsImpl::tryAddToFoldList(
@@ -847,11 +865,35 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
return false;
uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
- if (OpToFold.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
- return true;
+ MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
+ if (OpToFold.isImm()) {
+ if (unsigned UseSubReg = UseOp.getSubReg()) {
+ std::optional<int64_t> SubImm =
+ SIInstrInfo::extractSubregFromImm(OpToFold.getImm(), UseSubReg);
+ if (!SubImm)
+ return false;
+
+ // TODO: Avoid the temporary MachineOperand
+ MachineOperand TmpOp = MachineOperand::CreateImm(*SubImm);
+ if (TII->isOperandLegal(*UseMI, UseOpIdx, &TmpOp)) {
+ appendFoldCandidate(FoldList, UseMI, UseOpIdx, *SubImm);
+ return true;
+ }
+
+ return false;
+ }
+
+ if (TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
+ appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
+ return true;
+ }
}
+ // TODO: Verify the following code handles subregisters correctly.
+ // TODO: Handle extract of global reference
+ if (UseOp.getSubReg())
+ return false;
+
if (!OpToFold.isReg())
return false;
@@ -861,8 +903,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
// Maybe it is just a COPY of an immediate itself.
MachineInstr *Def = MRI->getVRegDef(UseReg);
- MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
- if (!UseOp.getSubReg() && Def && TII->isFoldableCopy(*Def)) {
+ if (Def && TII->isFoldableCopy(*Def)) {
MachineOperand &DefOp = Def->getOperand(1);
if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &DefOp);
diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir
index 591bda2b22f12..2389477bb72a8 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir
@@ -17,7 +17,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[REG_SEQUENCE]].sub0, 8, implicit-def $scc
- ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[REG_SEQUENCE]].sub1, 8, implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[REG_SEQUENCE]].sub1, 0, implicit-def $scc, implicit $scc
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
%0:sgpr_64 = COPY $sgpr8_sgpr9
@@ -42,8 +42,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[COPY]].sub1, %subreg.sub1
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub0, 30064771075, 0, implicit $exec
- ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[REG_SEQUENCE]].sub1, 30064771075, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[REG_SEQUENCE]].sub0, 3, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[REG_SEQUENCE]].sub1, 7, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
%0:vreg_64 = COPY $vgpr8_vgpr9
@@ -116,7 +116,7 @@ body: |
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 8
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B64_]].sub1, %subreg.sub0, [[S_MOV_B64_]].sub0, %subreg.sub1
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, 8, implicit-def $scc
- ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]].sub1, 8, implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]].sub1, 0, implicit-def $scc, implicit $scc
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADDC_U32_]]
%0:sgpr_64 = COPY $sgpr8_sgpr9
%1:sreg_64 = S_MOV_B64 8
More information about the llvm-commits
mailing list