[llvm] 2247072 - AMDGPU/GlobalISel: Set insert point when emitting control flow pseudos
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 11 15:53:36 PDT 2020
Author: Matt Arsenault
Date: 2020-06-11T18:53:26-04:00
New Revision: 2247072b658bda4673221aee87347fdb75fbda6c
URL: https://github.com/llvm/llvm-project/commit/2247072b658bda4673221aee87347fdb75fbda6c
DIFF: https://github.com/llvm/llvm-project/commit/2247072b658bda4673221aee87347fdb75fbda6c.diff
LOG: AMDGPU/GlobalISel: Set insert point when emitting control flow pseudos
This was implicitly assuming the branch instruction was the next after
the pseudo. It's possible for another non-terminator instruction to be
inserted between the intrinsic and the branch, so adjust the insertion
point. Fixes a non-terminator after terminator verifier error (which
without the verifier, manifested itself as an infinite loop in
analyzeBranch much later on).
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 83ebb0452474..7706b085a0be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4144,6 +4144,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
Register Use = MI.getOperand(3).getReg();
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
+ B.setInsertPt(B.getMBB(), BrCond->getIterator());
if (IntrID == Intrinsic::amdgcn_if) {
B.buildInstr(AMDGPU::SI_IF)
.addDef(Def)
@@ -4184,6 +4185,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
Register Reg = MI.getOperand(2).getReg();
+
+ B.setInsertPt(B.getMBB(), BrCond->getIterator());
B.buildInstr(AMDGPU::SI_LOOP)
.addUse(Reg)
.addMBB(UncondBrTarget);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
index 52d44d2d08fb..068ad6780a42 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE64 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE32 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s
---
name: legal_brcond_vcc
@@ -310,3 +310,99 @@ body: |
bb.2:
...
+
+# There's another instruction between the intrinsic and the
+# conditional branch, so we need to move the insert point.
+---
+name: brcond_si_if_need_insert_terminator_point
+body: |
+ ; WAVE64-LABEL: name: brcond_si_if_need_insert_terminator_point
+ ; WAVE64: bb.0:
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; WAVE64: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: G_BR %bb.1
+ ; WAVE64: bb.1:
+ ; WAVE64: S_ENDPGM 0, implicit [[COPY2]](s32)
+ ; WAVE32-LABEL: name: brcond_si_if_need_insert_terminator_point
+ ; WAVE32: bb.0:
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; WAVE32: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: G_BR %bb.1
+ ; WAVE32: bb.1:
+ ; WAVE32: S_ENDPGM 0, implicit [[COPY2]](s32)
+ bb.0:
+ successors: %bb.1
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s1) = G_ICMP intpred(ne), %0, %1
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %5:_(s32) = COPY $vgpr2
+ G_BRCOND %3, %bb.1
+
+ bb.1:
+ S_ENDPGM 0, implicit %5
+...
+
+---
+name: brcond_si_loop_need_terminator_insert_point
+tracksRegLiveness: true
+body: |
+ ; WAVE64-LABEL: name: brcond_si_loop_need_terminator_insert_point
+ ; WAVE64: bb.0:
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+ ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
+ ; WAVE64: bb.1:
+ ; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; WAVE64: S_NOP 0
+ ; WAVE64: S_NOP 0
+ ; WAVE64: S_NOP 0
+ ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: G_BR %bb.2
+ ; WAVE64: bb.2:
+ ; WAVE64: S_NOP 0
+ ; WAVE32-LABEL: name: brcond_si_loop_need_terminator_insert_point
+ ; WAVE32: bb.0:
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+ ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
+ ; WAVE32: bb.1:
+ ; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; WAVE32: S_NOP 0
+ ; WAVE32: S_NOP 0
+ ; WAVE32: S_NOP 0
+ ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: G_BR %bb.2
+ ; WAVE32: bb.2:
+ ; WAVE32: S_NOP 0
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s64) = COPY $sgpr0_sgpr1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ S_NOP 0
+ %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+ S_NOP 0
+ S_NOP 0
+ G_BRCOND %3, %bb.2
+ G_BR %bb.1
+
+ bb.2:
+ S_NOP 0
+...
More information about the llvm-commits
mailing list