[llvm] r273916 - AMDGPU: Fix verifier errors with undef vector indices

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 27 12:57:44 PDT 2016


Author: arsenm
Date: Mon Jun 27 14:57:44 2016
New Revision: 273916

URL: http://llvm.org/viewvc/llvm-project?rev=273916&view=rev
Log:
AMDGPU: Fix verifier errors with undef vector indices

Also fix pointlessly adding exec to liveins.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
    llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp?rev=273916&r1=273915&r2=273916&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp Mon Jun 27 14:57:44 2016
@@ -94,10 +94,12 @@ private:
                          MachineBasicBlock &LoopBB,
                          MachineBasicBlock &RemainderBB,
                          unsigned SaveReg,
-                         unsigned IdxReg);
+                         const MachineOperand &IdxReg);
 
   void emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, DebugLoc DL,
-                              MachineInstr *MovRel, unsigned IdxReg, int Offset);
+                              MachineInstr *MovRel,
+                              const MachineOperand &IdxReg,
+                              int Offset);
 
   bool loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
   void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
@@ -386,7 +388,7 @@ void SILowerControlFlow::splitBlockLiveI
                                            MachineBasicBlock &LoopBB,
                                            MachineBasicBlock &RemainderBB,
                                            unsigned SaveReg,
-                                           unsigned IdxReg) {
+                                           const MachineOperand &IdxReg) {
   LivePhysRegs RemainderLiveRegs(TRI);
 
   RemainderLiveRegs.addLiveOuts(MBB);
@@ -399,29 +401,38 @@ void SILowerControlFlow::splitBlockLiveI
   RemainderLiveRegs.addReg(SaveReg);
 
   if (const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val)) {
-    RemainderLiveRegs.addReg(Val->getReg());
-    LoopBB.addLiveIn(Val->getReg());
+    if (!Val->isUndef()) {
+      RemainderLiveRegs.addReg(Val->getReg());
+      LoopBB.addLiveIn(Val->getReg());
+    }
+  }
+
+  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  for (unsigned Reg : RemainderLiveRegs) {
+    if (MRI.isAllocatable(Reg))
+      RemainderBB.addLiveIn(Reg);
   }
 
-  for (unsigned Reg : RemainderLiveRegs)
-    RemainderBB.addLiveIn(Reg);
 
-  unsigned SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
-  LoopBB.addLiveIn(SrcReg);
-  LoopBB.addLiveIn(IdxReg);
+  const MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src);
+  if (!Src->isUndef())
+    LoopBB.addLiveIn(Src->getReg());
+
+  if (!IdxReg.isUndef())
+    LoopBB.addLiveIn(IdxReg.getReg());
   LoopBB.sortUniqueLiveIns();
 }
 
 void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB,
                                                 DebugLoc DL,
                                                 MachineInstr *MovRel,
-                                                unsigned IdxReg,
+                                                const MachineOperand &IdxReg,
                                                 int Offset) {
   MachineBasicBlock::iterator I = LoopBB.begin();
 
   // Read the next variant into VCC (lower 32 bits) <- also loop target
   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), AMDGPU::VCC_LO)
-    .addReg(IdxReg);
+    .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
 
   // Move index from VCC into M0
   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
@@ -430,7 +441,7 @@ void SILowerControlFlow::emitLoadM0FromV
   // Compare the just read M0 value to all possible Idx values
   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
     .addReg(AMDGPU::M0)
-    .addReg(IdxReg);
+    .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
 
   // Update EXEC, save the original EXEC value to VCC
   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
@@ -461,16 +472,16 @@ bool SILowerControlFlow::loadM0(MachineI
   DebugLoc DL = MI.getDebugLoc();
   MachineBasicBlock::iterator I(&MI);
 
-  unsigned Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx)->getReg();
+  const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
 
-  if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+  if (AMDGPU::SReg_32RegClass.contains(Idx->getReg())) {
     if (Offset) {
       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
-        .addReg(Idx)
+        .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()))
         .addImm(Offset);
     } else {
       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-        .addReg(Idx);
+        .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()));
     }
 
     MBB.insert(I, MovRel);
@@ -485,7 +496,7 @@ bool SILowerControlFlow::loadM0(MachineI
 
   // Reading from a VGPR requires looping over all workitems in the wavefront.
   assert(AMDGPU::SReg_64RegClass.contains(Save) &&
-         AMDGPU::VGPR_32RegClass.contains(Idx));
+         AMDGPU::VGPR_32RegClass.contains(Idx->getReg()));
 
   // Save the EXEC mask
   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save)
@@ -504,14 +515,13 @@ bool SILowerControlFlow::loadM0(MachineI
   LoopBB->addSuccessor(LoopBB);
   LoopBB->addSuccessor(RemainderBB);
 
-  if (TRI->trackLivenessAfterRegAlloc(MF))
-    splitBlockLiveIns(MBB, MI, *LoopBB, *RemainderBB, Save, Idx);
+  splitBlockLiveIns(MBB, MI, *LoopBB, *RemainderBB, Save, *Idx);
 
   // Move the rest of the block into a new block.
   RemainderBB->transferSuccessors(&MBB);
   RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
 
-  emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, Idx, Offset);
+  emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset);
 
   MachineBasicBlock::iterator First = RemainderBB->begin();
   BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
@@ -554,16 +564,16 @@ bool SILowerControlFlow::indirectSrc(Mac
   DebugLoc DL = MI.getDebugLoc();
 
   unsigned Dst = MI.getOperand(0).getReg();
-  unsigned Vec = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
+  const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src);
   int Off = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
   unsigned Reg;
 
-  computeIndirectRegAndOffset(Vec, Reg, Off);
+  computeIndirectRegAndOffset(SrcVec->getReg(), Reg, Off);
 
   MachineInstr *MovRel =
     BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
-    .addReg(Reg)
-    .addReg(Vec, RegState::Implicit);
+    .addReg(Reg, getUndefRegState(SrcVec->isUndef()))
+    .addReg(SrcVec->getReg(), RegState::Implicit);
 
   return loadM0(MI, MovRel, Off);
 }
@@ -575,7 +585,7 @@ bool SILowerControlFlow::indirectDst(Mac
 
   unsigned Dst = MI.getOperand(0).getReg();
   int Off = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
-  unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::val)->getReg();
+  MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
   unsigned Reg;
 
   computeIndirectRegAndOffset(Dst, Reg, Off);
@@ -583,7 +593,7 @@ bool SILowerControlFlow::indirectDst(Mac
   MachineInstr *MovRel =
     BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
     .addReg(Reg, RegState::Define)
-    .addReg(Val)
+    .addReg(Val->getReg(), getUndefRegState(Val->isUndef()))
     .addReg(Dst, RegState::Implicit);
 
   return loadM0(MI, MovRel, Off);

Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll?rev=273916&r1=273915&r2=273916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll Mon Jun 27 14:57:44 2016
@@ -94,6 +94,27 @@ entry:
   ret void
 }
 
+; CHECK-LABEL: {{^}}extract_undef_offset_sgpr:
+define void @extract_undef_offset_sgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+entry:
+  %ld = load volatile <4 x i32>, <4  x i32> addrspace(1)* %in
+  %value = extractelement <4 x i32> %ld, i32 undef
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}insert_undef_offset_sgpr_vector_src:
+; CHECK: buffer_load_dwordx4
+; CHECK: s_mov_b32 m0,
+; CHECK-NEXT: v_movreld_b32
+define void @insert_undef_offset_sgpr_vector_src(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+entry:
+  %ld = load <4 x i32>, <4  x i32> addrspace(1)* %in
+  %value = insertelement <4 x i32> %ld, i32 5, i32 undef
+  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
 ; CHECK-LABEL: {{^}}insert_w_offset:
 ; CHECK: s_mov_b32 m0
 ; CHECK-NEXT: v_movreld_b32_e32

Added: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir?rev=273916&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-undef.mir Mon Jun 27 14:57:44 2016
@@ -0,0 +1,327 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-lower-control-flow -o /dev/null %s 2>&1 | FileCheck %s
+# Getting an undef that is specifically a VGPR is tricky from IR
+
+# CHECK-LABEL: name: extract_undef_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
+# CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
+
+
+--- |
+  target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+  define void @extract_undef_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  entry:
+    %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
+    %value = extractelement <4 x i32> %ld, i32 undef
+    store i32 %value, i32 addrspace(1)* %out
+    ret void
+  }
+
+  define void @extract_undef_neg_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  entry:
+    %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
+    %value = extractelement <4 x i32> %ld, i32 undef
+    store i32 %value, i32 addrspace(1)* %out
+    ret void
+  }
+
+  define void @insert_undef_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  entry:
+    %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+    %value = insertelement <4 x i32> %ld, i32 5, i32 undef
+    store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+    ret void
+  }
+
+  define void @insert_undef_neg_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  entry:
+    %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+    %value = insertelement <4 x i32> %ld, i32 5, i32 undef
+    store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+    ret void
+  }
+
+  define void @insert_undef_value_offset_vgpr(<4 x i32> addrspace(1)*%out, <4 x i32> addrspace(1)* %in, i32 %idx) {
+  entry:
+    %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+    %value = insertelement <4 x i32> %ld, i32 undef, i32 %idx
+    store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+    ret void
+  }
+
+...
+---
+name:            extract_undef_offset_vgpr
+alignment:       0
+exposesReturnsTwice: false
+hasInlineAsm:    false
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    liveins: %sgpr0_sgpr1
+
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
+    %sgpr7 = S_MOV_B32 61440
+    %sgpr6 = S_MOV_B32 -1
+    S_WAITCNT 127
+    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+    S_WAITCNT 3952
+    %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+    S_WAITCNT 127
+    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+
+...
+
+# CHECK-LABEL: name: extract_undef_neg_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
+# CHECK: %m0 = S_MOV_B32 %vcc_lo
+# CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
+# CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1
+
+name:            extract_undef_neg_offset_vgpr
+alignment:       0
+exposesReturnsTwice: false
+hasInlineAsm:    false
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    liveins: %sgpr0_sgpr1
+
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
+    %sgpr7 = S_MOV_B32 61440
+    %sgpr6 = S_MOV_B32 -1
+    S_WAITCNT 127
+    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+    S_WAITCNT 3952
+    %vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+    S_WAITCNT 127
+    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+
+...
+
+# CHECK-LABEL: name: insert_undef_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
+# CHECK: %m0 = S_MOV_B32 %vcc_lo
+# CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1
+
+name:            insert_undef_offset_vgpr
+alignment:       0
+exposesReturnsTwice: false
+hasInlineAsm:    false
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    liveins: %sgpr0_sgpr1
+
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+    %sgpr7 = S_MOV_B32 61440
+    %sgpr6 = S_MOV_B32 -1
+    %vgpr4 = V_MOV_B32_e32 5, implicit %exec
+    S_WAITCNT 127
+    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+    S_WAITCNT 3952
+    %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+    S_WAITCNT 127
+    BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
+    S_ENDPGM
+
+...
+
+# CHECK-LABEL: name: insert_undef_neg_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
+# CHECK: %m0 = S_MOV_B32 %vcc_lo
+# CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
+# CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
+
+name:            insert_undef_neg_offset_vgpr
+alignment:       0
+exposesReturnsTwice: false
+hasInlineAsm:    false
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    liveins: %sgpr0_sgpr1
+
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+    %sgpr7 = S_MOV_B32 61440
+    %sgpr6 = S_MOV_B32 -1
+    %vgpr4 = V_MOV_B32_e32 5, implicit %exec
+    S_WAITCNT 127
+    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+    S_WAITCNT 3952
+    %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+    S_WAITCNT 127
+    BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
+    S_ENDPGM
+
+...
+
+# CHECK-LABEL: insert_undef_value_offset_vgpr{{$}}
+# CHECK: bb.1:
+# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
+# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
+
+# CHECK: %vcc_lo = V_READFIRSTLANE_B32 %vgpr4, implicit %exec
+# CHECK: %m0 = S_MOV_B32 %vcc_lo
+# CHECK: %vgpr0 = V_MOVRELD_B32_e32 undef %vgpr10, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
+
+# CHECK: bb.2:
+# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
+
+name:            insert_undef_value_offset_vgpr
+alignment:       0
+exposesReturnsTwice: false
+hasInlineAsm:    false
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    liveins: %sgpr0_sgpr1
+
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+    %sgpr7 = S_MOV_B32 61440
+    %sgpr6 = S_MOV_B32 -1
+    %vgpr4 = V_MOV_B32_e32 2, implicit %exec
+    S_WAITCNT 127
+    %vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
+    S_WAITCNT 3952
+    %vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, killed %vgpr4, 0, undef %vgpr10, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
+    S_WAITCNT 127
+    BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
+    S_ENDPGM
+
+...




More information about the llvm-commits mailing list