[llvm] r364566 - AMDGPU: Make fixing i1 copies robust against re-ordering

Nicolai Haehnle via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 27 09:56:44 PDT 2019


Author: nha
Date: Thu Jun 27 09:56:44 2019
New Revision: 364566

URL: http://llvm.org/viewvc/llvm-project?rev=364566&view=rev
Log:
AMDGPU: Make fixing i1 copies robust against re-ordering

Summary:
The new test case led to incorrect code.

Change-Id: Ief48b227e97aa662dd3535c9bafb27d4a184efca

Reviewers: arsenm, david-salinas

Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63871

Added:
    llvm/trunk/test/CodeGen/AMDGPU/i1-copies-rpo.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp?rev=364566&r1=364565&r2=364566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp Thu Jun 27 09:56:44 2019
@@ -95,6 +95,11 @@ private:
   MachineBasicBlock::iterator
   getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
 
+  bool isVreg1(unsigned Reg) const {
+    return TargetRegisterInfo::isVirtualRegister(Reg) &&
+           MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
+  }
+
   bool isLaneMaskReg(unsigned Reg) const {
     return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
            TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
@@ -494,13 +499,10 @@ void SILowerI1Copies::lowerCopiesFromI1(
 
       unsigned DstReg = MI.getOperand(0).getReg();
       unsigned SrcReg = MI.getOperand(1).getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
-          MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
+      if (!isVreg1(SrcReg))
         continue;
 
-      if (isLaneMaskReg(DstReg) ||
-          (TargetRegisterInfo::isVirtualRegister(DstReg) &&
-           MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
+      if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
         continue;
 
       // Copy into a 32-bit vector register.
@@ -543,7 +545,7 @@ void SILowerI1Copies::lowerPhis() {
 
     for (MachineInstr &MI : MBB.phis()) {
       unsigned DstReg = MI.getOperand(0).getReg();
-      if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+      if (!isVreg1(DstReg))
         continue;
 
       LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
@@ -560,7 +562,7 @@ void SILowerI1Copies::lowerPhis() {
 
         if (IncomingDef->getOpcode() == AMDGPU::COPY) {
           IncomingReg = IncomingDef->getOperand(1).getReg();
-          assert(isLaneMaskReg(IncomingReg));
+          assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
           assert(!IncomingDef->getOperand(1).getSubReg());
         } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
           continue;
@@ -668,8 +670,7 @@ void SILowerI1Copies::lowerCopiesToI1()
         continue;
 
       unsigned DstReg = MI.getOperand(0).getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(DstReg) ||
-          MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+      if (!isVreg1(DstReg))
         continue;
 
       if (MRI->use_empty(DstReg)) {
@@ -689,7 +690,7 @@ void SILowerI1Copies::lowerCopiesToI1()
       assert(!MI.getOperand(1).getSubReg());
 
       if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
-          !isLaneMaskReg(SrcReg)) {
+          (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
         assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
         unsigned TmpReg = createLaneMaskReg(*MF);
         BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)

Added: llvm/trunk/test/CodeGen/AMDGPU/i1-copies-rpo.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/i1-copies-rpo.mir?rev=364566&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/i1-copies-rpo.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/i1-copies-rpo.mir Thu Jun 27 09:56:44 2019
@@ -0,0 +1,51 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs  -run-pass=si-i1-copies -o - %s | FileCheck %s
+
+# The strange block ordering visits the use before the def.
+---
+name: inserted_cmp_operand_class_rpo
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; CHECK-LABEL: name: inserted_cmp_operand_class_rpo
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   S_BRANCH %bb.3
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[COPY:%[0-9]+]]:sreg_64 = COPY %1
+  ; CHECK: bb.2:
+  ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]]
+  ; CHECK:   S_ENDPGM 0
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
+  ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_1]], killed [[S_MOV_B32_]], implicit $exec
+  ; CHECK:   [[COPY2:%[0-9]+]]:sreg_64 = COPY [[V_CMP_EQ_U32_e64_]]
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    successors: %bb.3
+
+    S_BRANCH %bb.3
+
+  bb.1:
+    successors: %bb.2
+
+    %0:vreg_1 = COPY %1
+
+  bb.2:
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %3:sreg_64_xexec = COPY %0
+    S_ENDPGM 0
+
+  bb.3:
+    successors: %bb.1
+
+    %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %5:sreg_32_xm0 = S_MOV_B32 0
+    %6:sreg_64 = V_CMP_EQ_U32_e64 killed %4, killed %5, implicit $exec
+    %1:vreg_1 = COPY %6
+    S_BRANCH %bb.1




More information about the llvm-commits mailing list