[llvm] r374041 - AMDGPU: Propagate undef flag during pre-RA exec mask optimizations
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 8 05:46:32 PDT 2019
Author: nha
Date: Tue Oct 8 05:46:32 2019
New Revision: 374041
URL: http://llvm.org/viewvc/llvm-project?rev=374041&view=rev
Log:
AMDGPU: Propagate undef flag during pre-RA exec mask optimizations
Summary: Issue: https://github.com/GPUOpen-Drivers/llpc/issues/204
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68184
Modified:
llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
llvm/trunk/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
Modified: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp?rev=374041&r1=374040&r2=374041&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp Tue Oct 8 05:46:32 2019
@@ -250,15 +250,16 @@ static unsigned optimizeVcndVcmpPair(Mac
Op1->getImm() != 0 || Op2->getImm() != 1)
return AMDGPU::NoRegister;
- LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t'
- << *Cmp << '\t' << *And);
+ LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
+ << *And);
Register CCReg = CC->getReg();
LIS->RemoveMachineInstrFromMaps(*And);
- MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
- TII->get(Andn2Opc), And->getOperand(0).getReg())
- .addReg(ExecReg)
- .addReg(CCReg, 0, CC->getSubReg());
+ MachineInstr *Andn2 =
+ BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc),
+ And->getOperand(0).getReg())
+ .addReg(ExecReg)
+ .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg());
And->eraseFromParent();
LIS->InsertMachineInstrInMaps(*Andn2);
Modified: llvm/trunk/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir?rev=374041&r1=374040&r2=374041&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir Tue Oct 8 05:46:32 2019
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -run-pass=si-optimize-exec-masking-pre-ra -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# Check for regression from assuming an instruction was a copy after
# dropping the opcode check.
@@ -95,3 +95,26 @@ body: |
$exec = S_OR_B64 $exec, %7, implicit-def $scc
...
+
+# When folding a v_cndmask and a v_cmp in a pattern leading to
+# s_cbranch_vccz, ensure that an undef operand is handled correctly.
+---
+name: cndmask_cmp_cbranch_fold_undef
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: cndmask_cmp_cbranch_fold_undef
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000)
+ ; GCN: $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def $scc
+ ; GCN: S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ ; GCN: bb.1:
+ bb.0:
+
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %0:sreg_64_xexec, implicit $exec
+ V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
+ $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+
+ bb.1:
+
+...
More information about the llvm-commits
mailing list