[llvm-branch-commits] [llvm] AMDGPU: Handle subregister uses in SIFoldOperands constant folding (PR #127485)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Feb 17 04:25:12 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/127485
None
>From 329164156b1faed5ec7b77c217961d2669f006ac Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 17 Feb 2025 17:18:27 +0700
Subject: [PATCH] AMDGPU: Handle subregister uses in SIFoldOperands constant
folding
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 57 +++++++++++--------
.../AMDGPU/constant-fold-imm-immreg.mir | 34 +++++++++++
2 files changed, 67 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index d8f3f9c54abc1..30242c461768c 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -123,7 +123,7 @@ class SIFoldOperandsImpl {
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
- MachineOperand *getImmOrMaterializedImm(MachineOperand &Op) const;
+ std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const;
bool tryConstantFoldOp(MachineInstr *MI) const;
bool tryFoldCndMask(MachineInstr &MI) const;
bool tryFoldZeroHighBits(MachineInstr &MI) const;
@@ -1293,21 +1293,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
MI.removeOperand(I);
}
-MachineOperand *
+std::optional<int64_t>
SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const {
- // If this has a subregister, it obviously is a register source.
- if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister ||
- !Op.getReg().isVirtual())
- return &Op;
+ if (Op.isImm())
+ return Op.getImm();
- MachineInstr *Def = MRI->getVRegDef(Op.getReg());
+ if (!Op.isReg() || !Op.getReg().isVirtual())
+ return std::nullopt;
+
+ const MachineInstr *Def = MRI->getVRegDef(Op.getReg());
if (Def && Def->isMoveImmediate()) {
- MachineOperand &ImmSrc = Def->getOperand(1);
+ const MachineOperand &ImmSrc = Def->getOperand(1);
if (ImmSrc.isImm())
- return &ImmSrc;
+ return TII->extractSubregFromImm(ImmSrc.getImm(), Op.getSubReg());
}
- return &Op;
+ return std::nullopt;
}
// Try to simplify operations with a constant that may appear after instruction
@@ -1322,12 +1323,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
if (Src0Idx == -1)
return false;
- MachineOperand *Src0 = getImmOrMaterializedImm(MI->getOperand(Src0Idx));
+
+ MachineOperand *Src0 = &MI->getOperand(Src0Idx);
+ std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
Opc == AMDGPU::S_NOT_B32) &&
- Src0->isImm()) {
- MI->getOperand(1).ChangeToImmediate(~Src0->getImm());
+ Src0Imm) {
+ MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
return true;
}
@@ -1335,17 +1338,19 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
if (Src1Idx == -1)
return false;
- MachineOperand *Src1 = getImmOrMaterializedImm(MI->getOperand(Src1Idx));
- if (!Src0->isImm() && !Src1->isImm())
+ MachineOperand *Src1 = &MI->getOperand(Src1Idx);
+ std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
+
+ if (!Src0Imm && !Src1Imm)
return false;
// and k0, k1 -> v_mov_b32 (k0 & k1)
// or k0, k1 -> v_mov_b32 (k0 | k1)
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
- if (Src0->isImm() && Src1->isImm()) {
+ if (Src0Imm && Src1Imm) {
int32_t NewImm;
- if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
+ if (!evalBinaryInstruction(Opc, NewImm, *Src0Imm, *Src1Imm))
return false;
bool IsSGPR = TRI->isSGPRReg(*MRI, MI->getOperand(0).getReg());
@@ -1361,12 +1366,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
if (!MI->isCommutable())
return false;
- if (Src0->isImm() && !Src1->isImm()) {
+ if (Src0Imm && !Src1Imm) {
std::swap(Src0, Src1);
std::swap(Src0Idx, Src1Idx);
+ std::swap(Src0Imm, Src1Imm);
}
- int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
+ int32_t Src1Val = static_cast<int32_t>(*Src1Imm);
if (Opc == AMDGPU::V_OR_B32_e64 ||
Opc == AMDGPU::V_OR_B32_e32 ||
Opc == AMDGPU::S_OR_B32) {
@@ -1423,9 +1429,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
if (!Src1->isIdenticalTo(*Src0)) {
- auto *Src0Imm = getImmOrMaterializedImm(*Src0);
- auto *Src1Imm = getImmOrMaterializedImm(*Src1);
- if (!Src1Imm->isIdenticalTo(*Src0Imm))
+ std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
+ if (!Src1Imm)
+ return false;
+
+ std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
+ if (!Src0Imm || *Src0Imm != *Src1Imm)
return false;
}
@@ -1458,8 +1467,8 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
MI.getOpcode() != AMDGPU::V_AND_B32_e32)
return false;
- MachineOperand *Src0 = getImmOrMaterializedImm(MI.getOperand(1));
- if (!Src0->isImm() || Src0->getImm() != 0xffff)
+ std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
+ if (!Src0Imm || *Src0Imm != 0xffff)
return false;
Register Src1 = MI.getOperand(2).getReg();
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index 3db2b6ed9ab4b..56bcce968bdf2 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -926,3 +926,37 @@ body: |
S_ENDPGM 0, implicit %3
...
+
+---
+name: constant_s_xor_b32_uses_subreg
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: constant_s_xor_b32_uses_subreg
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]]
+ %0:sreg_64 = S_MOV_B64 32
+ %1:sreg_64 = S_MOV_B64 15
+ %2:sgpr_32 = S_XOR_B32 %0.sub0, %1.sub0, implicit-def dead $scc
+ %3:sgpr_32 = S_XOR_B32 %0.sub1, %1.sub1, implicit-def dead $scc
+ S_ENDPGM 0, implicit %2, implicit %3
+
+...
+
+---
+name: constant_v_or_b32_uses_subreg
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: constant_v_or_b32_uses_subreg
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 268435455, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]]
+ %0:vreg_64 = V_MOV_B64_PSEUDO 18446744069683019775, implicit $exec
+ %1:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ %2:vgpr_32 = V_OR_B32_e32 %0.sub0, %1.sub0, implicit $exec
+ %3:vgpr_32 = V_OR_B32_e32 %0.sub1, %1.sub1, implicit $exec
+ S_ENDPGM 0, implicit %2, implicit %3
+
+...
More information about the llvm-branch-commits
mailing list