[llvm] b2a65f0 - [AMDGPU] Skip additional folding on the same operand.
Michael Liao via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 24 08:30:37 PDT 2019
Author: Michael Liao
Date: 2019-10-24T11:30:22-04:00
New Revision: b2a65f0d70f529ce52004934867461fa5329da63
URL: https://github.com/llvm/llvm-project/commit/b2a65f0d70f529ce52004934867461fa5329da63
DIFF: https://github.com/llvm/llvm-project/commit/b2a65f0d70f529ce52004934867461fa5329da63.diff
LOG: [AMDGPU] Skip additional folding on the same operand.
Reviewers: rampitec, arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69355
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
llvm/test/CodeGen/AMDGPU/operand-folding.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a0e43101a7fc..7cf5f802c099 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -312,6 +312,19 @@ static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
return false;
}
+static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
+ MachineInstr *MI, unsigned OpNo,
+ MachineOperand *FoldOp, bool Commuted = false,
+ int ShrinkOp = -1) {
+ // Skip additional folding on the same operand.
+ for (FoldCandidate &Fold : FoldList)
+ if (Fold.UseMI == MI && Fold.UseOpNo == OpNo)
+ return;
+ LLVM_DEBUG(dbgs() << "Append " << (Commuted ? "commuted" : "normal")
+ << " operand " << OpNo << "\n " << *MI << '\n');
+ FoldList.push_back(FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
+}
+
static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold,
@@ -344,7 +357,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
// Special case for s_setreg_b32
if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
- FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+ appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
return true;
}
@@ -403,8 +416,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
unsigned MaybeCommutedOpc = MI->getOpcode();
int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
- FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
- Op32));
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
return true;
}
@@ -412,11 +424,11 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
return false;
}
- FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
return true;
}
- FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+ appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
return true;
}
@@ -494,7 +506,7 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
return false;
- FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
+ appendFoldCandidate(FoldList, UseMI, UseOpIdx, Op);
return true;
}
@@ -1398,5 +1410,5 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
foldInstOperand(MI, OpToFold);
}
}
- return false;
+ return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index b2521bb74500..86a634b81013 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -22,3 +22,21 @@ body: |
%9:vgpr_32 = COPY %8
%10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
...
+
+---
+# GCN-LABEL: name: no_extra_fold_on_same_opnd
+# The first XOR needs commuting to fold that immediate operand.
+# GCN: V_XOR_B32_e32 {{.*}} 0, %1
+# GCN: V_XOR_B32_e32 %2, %4.sub0
+name: no_extra_fold_on_same_opnd
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %4:vreg_64 = REG_SEQUENCE killed %0, %subreg.sub0, killed %3, %subreg.sub1
+ %5:vgpr_32 = V_XOR_B32_e32 %1, %4.sub1, implicit $exec
+ %6:vgpr_32 = V_XOR_B32_e32 %2, %4.sub0, implicit $exec
+...
diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
index 3836a2b7e599..9c99a01eedd9 100644
--- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -124,6 +124,30 @@ define amdgpu_kernel void @no_fold_tied_subregister() {
ret void
}
+; There should be exact one folding on the same operand.
+; CHECK-LABEL: {{^}}no_extra_fold_on_same_opnd
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+define void @no_extra_fold_on_same_opnd() {
+entry:
+ %s0 = load i32, i32 addrspace(5)* undef, align 4
+ %s0.i64= zext i32 %s0 to i64
+ br label %for.body.i.i
+
+for.body.i.i:
+ %s1 = load i32, i32 addrspace(1)* undef, align 8
+ %s1.i64 = sext i32 %s1 to i64
+ %xor = xor i64 %s1.i64, %s0.i64
+ %flag = icmp ult i64 %xor, 8
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ unreachable
+
+if.else:
+ unreachable
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
More information about the llvm-commits
mailing list