[llvm] 987e528 - AMDGPU: Fix assert when trying to fold reg_sequence of physreg copies

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 21 18:58:29 PDT 2021


Author: Matt Arsenault
Date: 2021-04-21T21:58:18-04:00
New Revision: 987e52851e6432c432dd9b462b6d7e0a0777ecbd

URL: https://github.com/llvm/llvm-project/commit/987e52851e6432c432dd9b462b6d7e0a0777ecbd
DIFF: https://github.com/llvm/llvm-project/commit/987e52851e6432c432dd9b462b6d7e0a0777ecbd.diff

LOG: AMDGPU: Fix assert when trying to fold reg_sequence of physreg copies

Added: 
    llvm/test/CodeGen/AMDGPU/swdev282079.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 00a9ea445bcd4..f89bd759d2aaa 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -503,11 +503,11 @@ static bool getRegSeqInit(
 
   for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
     MachineOperand *Sub = &Def->getOperand(I);
-    assert (Sub->isReg());
+    assert(Sub->isReg());
 
     for (MachineInstr *SubDef = MRI.getVRegDef(Sub->getReg());
-         SubDef && Sub->isReg() && !Sub->getSubReg() &&
-         TII->isFoldableCopy(*SubDef);
+         SubDef && Sub->isReg() && Sub->getReg().isVirtual() &&
+         !Sub->getSubReg() && TII->isFoldableCopy(*SubDef);
          SubDef = MRI.getVRegDef(Sub->getReg())) {
       MachineOperand *Op = &SubDef->getOperand(1);
       if (Op->isImm()) {
@@ -515,7 +515,7 @@ static bool getRegSeqInit(
           Sub = Op;
         break;
       }
-      if (!Op->isReg())
+      if (!Op->isReg() || Op->getReg().isPhysical())
         break;
       Sub = Op;
     }

diff  --git a/llvm/test/CodeGen/AMDGPU/swdev282079.mir b/llvm/test/CodeGen/AMDGPU/swdev282079.mir
new file mode 100644
index 0000000000000..820190cf943b5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/swdev282079.mir
@@ -0,0 +1,102 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -o - %s | FileCheck %s
+
+# This was attempting to look back through the REG_SEQUENCE source
+# operands and trying to look for physreg defs.
+
+---
+name: fold_reg_sequence_of_copy_from_physreg_0
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       8
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_reg_sequence_of_copy_from_physreg_0
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+    ; CHECK: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    ; CHECK: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+    S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
+    %3:vreg_64_align2 = IMPLICIT_DEF
+    FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    S_ENDPGM 0
+
+...
+
+---
+name: fold_reg_sequence_of_copy_from_physreg_1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       8
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_reg_sequence_of_copy_from_physreg_1
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+    ; CHECK: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; CHECK: FLAT_STORE_DWORDX2 killed [[REG_SEQUENCE]], killed [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    ; CHECK: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+    S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+    %2:vgpr_32 = COPY %0
+    %3:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %2, %subreg.sub1
+    %4:vreg_64_align2 = IMPLICIT_DEF
+    FLAT_STORE_DWORDX2 killed %3, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    S_ENDPGM 0
+
+...
+
+---
+name: fold_reg_sequence_of_copy_from_physreg_2
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       8
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_reg_sequence_of_copy_from_physreg_2
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+    ; CHECK: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[COPY]], %subreg.sub1
+    ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    ; CHECK: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+    S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1
+    %0:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+    %1:vgpr_32 = COPY $vgpr0
+    %2:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
+    %3:vreg_64_align2 = IMPLICIT_DEF
+    FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list