[llvm] 07cd19e - AMDGPU: Fix dropping MI flags when rewriting instructions
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed May 27 10:27:12 PDT 2020
Author: Matt Arsenault
Date: 2020-05-27T13:27:06-04:00
New Revision: 07cd19efa2a63b01aea9b516a7a003cb7f750a12
URL: https://github.com/llvm/llvm-project/commit/07cd19efa2a63b01aea9b516a7a003cb7f750a12
DIFF: https://github.com/llvm/llvm-project/commit/07cd19efa2a63b01aea9b516a7a003cb7f750a12.diff
LOG: AMDGPU: Fix dropping MI flags when rewriting instructions
All 3 passes that change instruction encodings were dropping MI
flags. This avoids scheduling regressions caused by setting
mayRaiseFPExceptions on FP instructions for non-strictfp functions.
Added:
llvm/test/CodeGen/AMDGPU/shrink-instructions-flags.mir
Modified:
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/test/CodeGen/AMDGPU/dpp_combine.mir
llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 10e2c3a263f1..1fa75504493b 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -168,7 +168,9 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
- OrigMI.getDebugLoc(), TII->get(DPPOp));
+ OrigMI.getDebugLoc(), TII->get(DPPOp))
+ .setMIFlags(OrigMI.getFlags());
+
bool Fail = false;
do {
auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index fd1da238a876..06dd11fdbf61 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3272,7 +3272,8 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
unsigned Op32) const {
MachineBasicBlock *MBB = MI.getParent();;
MachineInstrBuilder Inst32 =
- BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
+ BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32))
+ .setMIFlags(MI.getFlags());
// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
// For VOPC instructions, this is replaced by an implicit def of vcc.
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index c4f511abc4ae..9a1855c3458b 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -922,18 +922,24 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
if (I->modifiesRegister(AMDGPU::VCC, TRI))
return;
}
+
// Make the two new e32 instruction variants.
// Replace MI with V_{SUB|ADD}_I32_e32
- auto NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc));
- NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst));
- NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
- NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1));
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc))
+ .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
+ .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
+ .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
+ .setMIFlags(MI.getFlags());
+
MI.eraseFromParent();
+
// Replace MISucc with V_{SUBB|ADDC}_U32_e32
- auto NewInst = BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc));
- NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst));
- NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0));
- NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1));
+ BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc))
+ .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst))
+ .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0))
+ .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1))
+ .setMIFlags(MISucc.getFlags());
+
MISucc.eraseFromParent();
}
@@ -1010,7 +1016,8 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
// Create SDWA version of instruction MI and initialize its operands
MachineInstrBuilder SDWAInst =
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc)
+ .setMIFlags(MI.getFlags());
// Copy dst, if it is present in original then should also be present in SDWA
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index 0c4d0e0e2384..859c21d8842f 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -293,6 +293,8 @@ body: |
%19:vgpr_32 = V_ADD_I32_e32 5, %18, implicit-def $vcc, implicit $exec
...
+---
+
# check for floating point modifiers
# GCN-LABEL: name: add_f32_e64
# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
@@ -810,3 +812,24 @@ body: |
%4:sreg_64_xexec = IMPLICIT_DEF
%5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
...
+
+---
+
+# Make sure flags aren't dropped
+# GCN-LABEL: name: flags_add_f32_e64
+# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $exec
+name: flags_add_f32_e64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = IMPLICIT_DEF
+
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $exec
+ S_ENDPGM 0, implicit %4
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
index a759972d3a10..c181f51e747f 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
@@ -3,8 +3,8 @@
# test for 3 consecutive _sdwa's
# GFX9-LABEL: name: test1_add_co_sdwa
-# GFX9: V_ADD_I32_sdwa
-# GFX9-NEXT: V_ADDC_U32_e32
+# GFX9: = nsw V_ADD_I32_sdwa
+# GFX9-NEXT: = nuw V_ADDC_U32_e32
# GFX9: V_ADD_I32_sdwa
# GFX9-NEXT: V_ADDC_U32_e32
# GFX9: V_ADD_I32_sdwa
@@ -26,8 +26,8 @@ body: |
%22:sreg_32_xm0 = S_MOV_B32 255
%23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%30:vreg_64 = COPY $sgpr0_sgpr1
- %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
- %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
+ %63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+ %64:vgpr_32, dead %66:sreg_64_xexec = nuw V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
index 3a3bda4a807a..8ba20b4a66dd 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
@@ -361,7 +361,6 @@ body: |
# GFX9: $vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, implicit-def $exec, implicit $exec
-
name: vopc_instructions
tracksRegLiveness: true
registers:
@@ -445,3 +444,27 @@ body: |
FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
+...
+
+# GCN-LABEL: name: preserve_flags
+# GCN: = nnan nofpexcept V_ADD_F32_sdwa 0, %4, 0, %4, 0, 0, 6, 0, 5, 1, implicit $exec
+
+---
+name: preserve_flags
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:sreg_32 = S_MOV_B32 65535
+ %2:vgpr_32 = V_LSHRREV_B32_e64 16, %0, implicit $exec
+ %3:vgpr_32 = V_AND_B32_e32 %1, %2, implicit $exec
+ %4:vgpr_32 = V_LSHLREV_B32_e64 16, %3, implicit $exec
+ %5:vgpr_32 = V_LSHRREV_B32_e64 16, %4, implicit $exec
+ %6:vgpr_32 = V_BFE_U32 %4, 8, 8, implicit $exec
+ %7:vgpr_32 = nnan nofpexcept V_ADD_F32_e32 %5, %6, implicit $mode, implicit $exec
+ S_ENDPGM 0, implicit %7
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-instructions-flags.mir b/llvm/test/CodeGen/AMDGPU/shrink-instructions-flags.mir
new file mode 100644
index 000000000000..b8c36bc77148
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/shrink-instructions-flags.mir
@@ -0,0 +1,24 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -run-pass=si-shrink-instructions %s -o - | FileCheck %s
+
+# Make sure flags are preserved when shrinking instructions
+---
+
+name: shrink_fadd_f32_flags
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: shrink_fadd_f32_flags
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK: %2:vgpr_32 = nnan nofpexcept V_ADD_F32_e32 [[COPY]], [[COPY1]], implicit $exec
+ ; CHECK: S_NOP 0
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr0
+ %2:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0
+
+...
More information about the llvm-commits
mailing list