[llvm] r278676 - AMDGPU: Don't fold subregister extracts into tied operands
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 15 09:18:36 PDT 2016
Author: arsenm
Date: Mon Aug 15 11:18:36 2016
New Revision: 278676
URL: http://llvm.org/viewvc/llvm-project?rev=278676&view=rev
Log:
AMDGPU: Don't fold subregister extracts into tied operands
Modified:
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/test/CodeGen/AMDGPU/operand-folding.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=278676&r1=278675&r2=278676&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Mon Aug 15 11:18:36 2016
@@ -197,9 +197,21 @@ static void foldOperand(MachineOperand &
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
// FIXME: Fold operands with subregs.
- if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
- UseOp.isImplicit())) {
- return;
+ if (UseOp.isReg() && OpToFold.isReg()) {
+ if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
+ return;
+
+ // Don't fold subregister extracts into tied operands, only if it is a full
+ // copy since a subregister use tied to a full register def doesn't really
+ // make sense. e.g. don't fold:
+ //
+ // %vreg1 = COPY %vreg0:sub1
+ // %vreg2<tied3> = V_MAC_F32 %vreg3, %vreg4, %vreg1<tied0>
+ //
+ // into
+ // %vreg2<tied3> = V_MAC_F32 %vreg3, %vreg4, %vreg0:sub1<tied0>
+ if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
+ return;
}
bool FoldingImm = OpToFold.isImm();
Modified: llvm/trunk/test/CodeGen/AMDGPU/operand-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/operand-folding.ll?rev=278676&r1=278675&r2=278676&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/operand-folding.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/operand-folding.ll Mon Aug 15 11:18:36 2016
@@ -109,6 +109,21 @@ entry:
ret void
}
+; A subregister use operand should not be tied.
+; CHECK-LABEL: {{^}}no_fold_tied_subregister:
+; CHECK: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
+; CHECK: buffer_store_dword v[[LO]]
+define void @no_fold_tied_subregister() {
+ %tmp1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
+ %tmp2 = extractelement <2 x float> %tmp1, i32 0
+ %tmp3 = extractelement <2 x float> %tmp1, i32 1
+ %tmp4 = fmul float %tmp3, 10.0
+ %tmp5 = fadd float %tmp4, %tmp2
+ store volatile float %tmp5, float addrspace(1)* undef
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
More information about the llvm-commits
mailing list