[llvm] 987e528 - AMDGPU: Fix assert when trying to fold reg_sequence of physreg copies
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 21 18:58:29 PDT 2021
Author: Matt Arsenault
Date: 2021-04-21T21:58:18-04:00
New Revision: 987e52851e6432c432dd9b462b6d7e0a0777ecbd
URL: https://github.com/llvm/llvm-project/commit/987e52851e6432c432dd9b462b6d7e0a0777ecbd
DIFF: https://github.com/llvm/llvm-project/commit/987e52851e6432c432dd9b462b6d7e0a0777ecbd.diff
LOG: AMDGPU: Fix assert when trying to fold reg_sequence of physreg copies
Added:
llvm/test/CodeGen/AMDGPU/swdev282079.mir
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 00a9ea445bcd4..f89bd759d2aaa 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -503,11 +503,11 @@ static bool getRegSeqInit(
for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
MachineOperand *Sub = &Def->getOperand(I);
- assert (Sub->isReg());
+ assert(Sub->isReg());
for (MachineInstr *SubDef = MRI.getVRegDef(Sub->getReg());
- SubDef && Sub->isReg() && !Sub->getSubReg() &&
- TII->isFoldableCopy(*SubDef);
+ SubDef && Sub->isReg() && Sub->getReg().isVirtual() &&
+ !Sub->getSubReg() && TII->isFoldableCopy(*SubDef);
SubDef = MRI.getVRegDef(Sub->getReg())) {
MachineOperand *Op = &SubDef->getOperand(1);
if (Op->isImm()) {
@@ -515,7 +515,7 @@ static bool getRegSeqInit(
Sub = Op;
break;
}
- if (!Op->isReg())
+ if (!Op->isReg() || Op->getReg().isPhysical())
break;
Sub = Op;
}
diff --git a/llvm/test/CodeGen/AMDGPU/swdev282079.mir b/llvm/test/CodeGen/AMDGPU/swdev282079.mir
new file mode 100644
index 0000000000000..820190cf943b5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/swdev282079.mir
@@ -0,0 +1,102 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -o - %s | FileCheck %s
+
+# This was attempting to look back through the REG_SEQUENCE source
+# operands and trying to look for physreg defs.
+
+---
+name: fold_reg_sequence_of_copy_from_physreg_0
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 8
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_reg_sequence_of_copy_from_physreg_0
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ ; CHECK: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; CHECK: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
+ %3:vreg_64_align2 = IMPLICIT_DEF
+ FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ S_ENDPGM 0
+
+...
+
+---
+name: fold_reg_sequence_of_copy_from_physreg_1
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 8
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_reg_sequence_of_copy_from_physreg_1
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ ; CHECK: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK: FLAT_STORE_DWORDX2 killed [[REG_SEQUENCE]], killed [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; CHECK: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+ %2:vgpr_32 = COPY %0
+ %3:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %2, %subreg.sub1
+ %4:vreg_64_align2 = IMPLICIT_DEF
+ FLAT_STORE_DWORDX2 killed %3, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ S_ENDPGM 0
+
+...
+
+---
+name: fold_reg_sequence_of_copy_from_physreg_2
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 8
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_reg_sequence_of_copy_from_physreg_2
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ ; CHECK: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[COPY]], %subreg.sub1
+ ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ ; CHECK: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+ %0:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+ %1:vgpr_32 = COPY $vgpr0
+ %2:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
+ %3:vreg_64_align2 = IMPLICIT_DEF
+ FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list