[llvm] r305815 - [AMDGPU] Eliminate SGPR to VGPR copy when possible

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 20 11:32:42 PDT 2017


Author: rampitec
Date: Tue Jun 20 13:32:42 2017
New Revision: 305815

URL: http://llvm.org/viewvc/llvm-project?rev=305815&view=rev
Log:
[AMDGPU] Eliminate SGPR to VGPR copy when possible

SGPRs are generally cheaper, so try to use them over VGPRs.

Differential Revision: https://reviews.llvm.org/D34130

Added:
    llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
    llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
    llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
    llvm/trunk/test/CodeGen/AMDGPU/uint_to_fp.i64.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp?rev=305815&r1=305814&r2=305815&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp Tue Jun 20 13:32:42 2017
@@ -174,6 +174,31 @@ static bool isSGPRToVGPRCopy(const Targe
   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
 }
 
+static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
+                                      const SIRegisterInfo *TRI,
+                                      const SIInstrInfo *TII) {
+  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  auto &Src = MI.getOperand(1);
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = Src.getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+      !TargetRegisterInfo::isVirtualRegister(DstReg))
+    return false;
+
+  for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
+    const auto *UseMI = MO.getParent();
+    if (UseMI == &MI)
+      continue;
+    if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
+        UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
+        !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
+      return false;
+  }
+  // Change VGPR to SGPR destination.
+  MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
+  return true;
+}
+
 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
 //
 // SGPRx = ...
@@ -214,6 +239,9 @@ static bool foldVGPRCopyIntoRegSequence(
   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
     return false;
 
+  if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
+    return true;
+
   // TODO: Could have multiple extracts?
   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
   if (SubReg != AMDGPU::NoSubRegister)
@@ -563,6 +591,8 @@ bool SIFixSGPRCopies::runOnMachineFuncti
             break;
           }
           TII->moveToVALU(MI);
+        } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
+          tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
         }
 
         break;

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll?rev=305815&r1=305814&r2=305815&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll Tue Jun 20 13:32:42 2017
@@ -4,7 +4,7 @@
 declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
-; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll?rev=305815&r1=305814&r2=305815&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll Tue Jun 20 13:32:42 2017
@@ -4,7 +4,7 @@
 declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
-; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {

Added: llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir?rev=305815&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir Tue Jun 20 13:32:42 2017
@@ -0,0 +1,341 @@
+# RUN: llc -march=amdgcn -run-pass si-fix-sgpr-copies,si-fold-operands,dead-mi-elimination -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+# Check that constant is in SGPR registers
+
+# GCN-LABEL: {{^}}name: const_to_sgpr{{$}}
+# GCN:        %[[HI:[0-9]+]] = S_MOV_B32 0
+# GCN-NEXT:   %[[LO:[0-9]+]] = S_MOV_B32 1048576
+# GCN-NEXT:   %[[SGPR_PAIR:[0-9]+]] = REG_SEQUENCE killed %[[LO]], 1, killed %[[HI]], 2
+# GCN-NEXT:   V_CMP_LT_U64_e64 killed %{{[0-9]+}}, %[[SGPR_PAIR]], implicit %exec
+
+
+# GCN-LABEL: {{^}}name: const_to_sgpr_multiple_use{{$}}
+# GCN:        %[[HI:[0-9]+]] = S_MOV_B32 0
+# GCN-NEXT:   %[[LO:[0-9]+]] = S_MOV_B32 1048576
+# GCN-NEXT:   %[[SGPR_PAIR:[0-9]+]] = REG_SEQUENCE killed %[[LO]], 1, killed %[[HI]], 2
+# GCN-NEXT:   V_CMP_LT_U64_e64 killed %{{[0-9]+}}, %[[SGPR_PAIR]], implicit %exec
+# GCN-NEXT:   V_CMP_LT_U64_e64 killed %{{[0-9]+}}, %[[SGPR_PAIR]], implicit %exec
+
+# GCN-LABEL: {{^}}name: const_to_sgpr_subreg{{$}}
+# GCN:       %[[OP0:[0-9]+]] = REG_SEQUENCE killed %{{[0-9]+}}, 1, killed %{{[0-9]+}}, 2
+# GCN-NEXT:  V_CMP_LT_U32_e64 killed %[[OP0]].sub0, 12, implicit %exec
+
+--- |
+  define amdgpu_kernel void @const_to_sgpr(i32 addrspace(1)* nocapture %arg, i64 %id) {
+  bb:
+    br i1 undef, label %bb1, label %bb2
+
+  bb1:                                              ; preds = %bb
+    br label %bb2
+
+  bb2:                                              ; preds = %bb1, %bb
+    ret void
+  }
+
+  define amdgpu_kernel void @const_to_sgpr_multiple_use(i32 addrspace(1)* nocapture %arg, i64 %id1, i64 %id2) {
+  bb:
+    br i1 undef, label %bb1, label %bb2
+
+  bb1:                                              ; preds = %bb
+    br label %bb2
+
+  bb2:                                              ; preds = %bb1, %bb
+    ret void
+  }
+
+  define amdgpu_kernel void @const_to_sgpr_subreg(i32 addrspace(1)* nocapture %arg, i64 %id) {
+  bb:
+    br i1 undef, label %bb1, label %bb2
+
+  bb1:                                              ; preds = %bb
+    br label %bb2
+
+  bb2:                                              ; preds = %bb1, %bb
+    ret void
+  }
+
+...
+---
+name:            const_to_sgpr
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sreg_64 }
+  - { id: 1, class: sreg_64 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: sgpr_64 }
+  - { id: 4, class: sreg_32_xm0 }
+  - { id: 5, class: sgpr_32 }
+  - { id: 6, class: sreg_64 }
+  - { id: 7, class: sreg_64_xexec }
+  - { id: 8, class: sreg_64_xexec }
+  - { id: 9, class: sreg_32 }
+  - { id: 10, class: sreg_64 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sreg_32_xm0 }
+  - { id: 13, class: sreg_32_xm0 }
+  - { id: 14, class: sreg_32_xm0 }
+  - { id: 15, class: sreg_32_xm0 }
+  - { id: 16, class: sreg_32_xm0 }
+  - { id: 17, class: sreg_64 }
+  - { id: 18, class: sreg_32_xm0 }
+  - { id: 19, class: sreg_32_xm0 }
+  - { id: 20, class: sreg_64 }
+  - { id: 21, class: sreg_64 }
+  - { id: 22, class: vreg_64 }
+  - { id: 23, class: sreg_32_xm0 }
+  - { id: 24, class: sreg_64 }
+  - { id: 25, class: sreg_32_xm0 }
+  - { id: 26, class: sreg_32_xm0 }
+  - { id: 27, class: sgpr_64 }
+  - { id: 28, class: sgpr_128 }
+  - { id: 29, class: vgpr_32 }
+  - { id: 30, class: vreg_64 }
+liveins:
+  - { reg: '%vgpr0', virtual-reg: '%2' }
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%3' }
+body:             |
+  bb.0.bb:
+    successors: %bb.1.bb1(0x40000000), %bb.2.bb2(0x40000000)
+    liveins: %vgpr0, %sgpr0_sgpr1
+
+    %3 = COPY %sgpr0_sgpr1
+    %2 = COPY %vgpr0
+    %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
+    %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
+    %6 = COPY %7
+    %9 = S_MOV_B32 0
+    %10 = REG_SEQUENCE %2, 1, killed %9, 2
+    %0 = COPY %10
+    %11 = COPY %10.sub0
+    %12 = COPY %10.sub1
+    %13 = COPY %8.sub0
+    %14 = COPY %8.sub1
+    %15 = S_ADD_U32 killed %11, killed %13, implicit-def %scc
+    %16 = S_ADDC_U32 killed %12, killed %14, implicit-def dead %scc, implicit %scc
+    %17 = REG_SEQUENCE killed %15, 1, killed %16, 2
+    %18 = S_MOV_B32 0
+    %19 = S_MOV_B32 1048576
+    %20 = REG_SEQUENCE killed %19, 1, killed %18, 2
+    %22 = COPY killed %20
+    %21 = V_CMP_LT_U64_e64 killed %17, %22, implicit %exec
+    %1 = SI_IF killed %21, %bb.2.bb2, implicit-def dead %exec, implicit-def dead %scc, implicit %exec
+    S_BRANCH %bb.1.bb1
+
+  bb.1.bb1:
+    successors: %bb.2.bb2(0x80000000)
+
+    %23 = S_MOV_B32 2
+    %24 = S_LSHL_B64 %0, killed %23, implicit-def dead %scc
+    %25 = S_MOV_B32 61440
+    %26 = S_MOV_B32 0
+    %27 = REG_SEQUENCE killed %26, 1, killed %25, 2
+    %28 = REG_SEQUENCE %6, 17, killed %27, 18
+    %29 = V_MOV_B32_e32 0, implicit %exec
+    %30 = COPY %24
+    BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit %exec
+
+  bb.2.bb2:
+    SI_END_CF %1, implicit-def dead %exec, implicit-def dead %scc, implicit %exec
+    S_ENDPGM
+
+...
+---
+name:            const_to_sgpr_multiple_use
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sreg_64 }
+  - { id: 1, class: sreg_64 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: sgpr_64 }
+  - { id: 4, class: sreg_32_xm0 }
+  - { id: 5, class: sgpr_32 }
+  - { id: 6, class: sreg_64 }
+  - { id: 7, class: sreg_64_xexec }
+  - { id: 8, class: sreg_64_xexec }
+  - { id: 9, class: sreg_64_xexec }
+  - { id: 10, class: sreg_32 }
+  - { id: 11, class: sreg_64 }
+  - { id: 12, class: sreg_32_xm0 }
+  - { id: 13, class: sreg_32_xm0 }
+  - { id: 14, class: sreg_32_xm0 }
+  - { id: 15, class: sreg_32_xm0 }
+  - { id: 16, class: sreg_32_xm0 }
+  - { id: 17, class: sreg_32_xm0 }
+  - { id: 18, class: sreg_64 }
+  - { id: 19, class: sreg_32_xm0 }
+  - { id: 20, class: sreg_32_xm0 }
+  - { id: 21, class: sreg_32_xm0 }
+  - { id: 22, class: sreg_32_xm0 }
+  - { id: 23, class: sreg_64 }
+  - { id: 24, class: sreg_32_xm0 }
+  - { id: 25, class: sreg_32_xm0 }
+  - { id: 26, class: sreg_64 }
+  - { id: 27, class: sreg_64 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: sreg_64 }
+  - { id: 30, class: vreg_64 }
+  - { id: 31, class: sreg_64 }
+  - { id: 32, class: sreg_32_xm0 }
+  - { id: 33, class: sreg_64 }
+  - { id: 34, class: sreg_32_xm0 }
+  - { id: 35, class: sreg_32_xm0 }
+  - { id: 36, class: sgpr_64 }
+  - { id: 37, class: sgpr_128 }
+  - { id: 38, class: vgpr_32 }
+  - { id: 39, class: vreg_64 }
+liveins:
+  - { reg: '%vgpr0', virtual-reg: '%2' }
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%3' }
+body:             |
+  bb.0.bb:
+    successors: %bb.1.bb1(0x40000000), %bb.2.bb2(0x40000000)
+    liveins: %vgpr0, %sgpr0_sgpr1
+
+    %3 = COPY %sgpr0_sgpr1
+    %2 = COPY %vgpr0
+    %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
+    %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
+    %9 = S_LOAD_DWORDX2_IMM %3, 13, 0
+    %6 = COPY %7
+    %10 = S_MOV_B32 0
+    %11 = REG_SEQUENCE %2, 1, killed %10, 2
+    %0 = COPY %11
+    %12 = COPY %11.sub0
+    %13 = COPY %11.sub1
+    %14 = COPY %8.sub0
+    %15 = COPY %8.sub1
+    %16 = S_ADD_U32 %12, killed %14, implicit-def %scc
+    %17 = S_ADDC_U32 %13, killed %15, implicit-def dead %scc, implicit %scc
+    %18 = REG_SEQUENCE killed %16, 1, killed %17, 2
+    %19 = COPY %9.sub0
+    %20 = COPY %9.sub1
+    %21 = S_ADD_U32 %12, killed %19, implicit-def %scc
+    %22 = S_ADDC_U32 %13, killed %20, implicit-def dead %scc, implicit %scc
+    %23 = REG_SEQUENCE killed %21, 1, killed %22, 2
+    %24 = S_MOV_B32 0
+    %25 = S_MOV_B32 1048576
+    %26 = REG_SEQUENCE killed %25, 1, killed %24, 2
+    %28 = COPY %26
+    %27 = V_CMP_LT_U64_e64 killed %18, %28, implicit %exec
+    %29 = V_CMP_LT_U64_e64 killed %23, %28, implicit %exec
+    %31 = S_AND_B64 killed %27, killed %29, implicit-def dead %scc
+    %1 = SI_IF killed %31, %bb.2.bb2, implicit-def dead %exec, implicit-def dead %scc, implicit %exec
+    S_BRANCH %bb.1.bb1
+
+  bb.1.bb1:
+    successors: %bb.2.bb2(0x80000000)
+
+    %32 = S_MOV_B32 2
+    %33 = S_LSHL_B64 %0, killed %32, implicit-def dead %scc
+    %34 = S_MOV_B32 61440
+    %35 = S_MOV_B32 0
+    %36 = REG_SEQUENCE killed %35, 1, killed %34, 2
+    %37 = REG_SEQUENCE %6, 17, killed %36, 18
+    %38 = V_MOV_B32_e32 0, implicit %exec
+    %39 = COPY %33
+    BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, implicit %exec
+
+  bb.2.bb2:
+    SI_END_CF %1, implicit-def dead %exec, implicit-def dead %scc, implicit %exec
+    S_ENDPGM
+
+...
+---
+name:            const_to_sgpr_subreg
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sreg_64 }
+  - { id: 1, class: sreg_64 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: sgpr_64 }
+  - { id: 4, class: sreg_32_xm0 }
+  - { id: 5, class: sgpr_32 }
+  - { id: 6, class: sreg_64 }
+  - { id: 7, class: sreg_64_xexec }
+  - { id: 8, class: sreg_64_xexec }
+  - { id: 9, class: sreg_32 }
+  - { id: 10, class: sreg_64 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sreg_32_xm0 }
+  - { id: 13, class: sreg_32_xm0 }
+  - { id: 14, class: sreg_32_xm0 }
+  - { id: 15, class: sreg_32_xm0 }
+  - { id: 16, class: sreg_32_xm0 }
+  - { id: 17, class: sreg_64 }
+  - { id: 18, class: sreg_32_xm0 }
+  - { id: 19, class: sreg_32_xm0 }
+  - { id: 20, class: sreg_64 }
+  - { id: 21, class: sreg_64 }
+  - { id: 22, class: vgpr_32 }
+  - { id: 23, class: sreg_32_xm0 }
+  - { id: 24, class: sreg_64 }
+  - { id: 25, class: sreg_32_xm0 }
+  - { id: 26, class: sreg_32_xm0 }
+  - { id: 27, class: sgpr_64 }
+  - { id: 28, class: sgpr_128 }
+  - { id: 29, class: vgpr_32 }
+  - { id: 30, class: vreg_64 }
+liveins:
+  - { reg: '%vgpr0', virtual-reg: '%2' }
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%3' }
+body:             |
+  bb.0.bb:
+    successors: %bb.1.bb1(0x40000000), %bb.2.bb2(0x40000000)
+    liveins: %vgpr0, %sgpr0_sgpr1
+
+    %3 = COPY %sgpr0_sgpr1
+    %2 = COPY %vgpr0
+    %7 = S_LOAD_DWORDX2_IMM %3, 9, 0
+    %8 = S_LOAD_DWORDX2_IMM %3, 11, 0
+    %6 = COPY %7
+    %9 = S_MOV_B32 0
+    %10 = REG_SEQUENCE %2, 1, killed %9, 2
+    %0 = COPY %10
+    %11 = COPY %10.sub0
+    %12 = COPY %10.sub1
+    %13 = COPY %8.sub0
+    %14 = COPY %8.sub1
+    %15 = S_ADD_U32 killed %11, killed %13, implicit-def %scc
+    %16 = S_ADDC_U32 killed %12, killed %14, implicit-def dead %scc, implicit %scc
+    %17 = REG_SEQUENCE killed %15, 1, killed %16, 2
+    %18 = S_MOV_B32 12
+    %19 = S_MOV_B32 1048576
+    %20 = REG_SEQUENCE killed %19, 1, killed %18, 2
+    %22 = COPY killed %20.sub1
+    %21 = V_CMP_LT_U32_e64 killed %17.sub0, %22, implicit %exec
+    %1 = SI_IF killed %21, %bb.2.bb2, implicit-def dead %exec, implicit-def dead %scc, implicit %exec
+    S_BRANCH %bb.1.bb1
+
+  bb.1.bb1:
+    successors: %bb.2.bb2(0x80000000)
+
+    %23 = S_MOV_B32 2
+    %24 = S_LSHL_B64 %0, killed %23, implicit-def dead %scc
+    %25 = S_MOV_B32 61440
+    %26 = S_MOV_B32 0
+    %27 = REG_SEQUENCE killed %26, 1, killed %25, 2
+    %28 = REG_SEQUENCE %6, 17, killed %27, 18
+    %29 = V_MOV_B32_e32 0, implicit %exec
+    %30 = COPY %24
+    BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit %exec
+
+  bb.2.bb2:
+    SI_END_CF %1, implicit-def dead %exec, implicit-def dead %scc, implicit %exec
+    S_ENDPGM
+
+...

Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll?rev=305815&r1=305814&r2=305815&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll Tue Jun 20 13:32:42 2017
@@ -400,9 +400,9 @@ store_label:
 
 ; Check that "pulling out" SDWA operands works correctly.
 ; GCN-LABEL: {{^}}pulled_out_test:
-; NOSDWA-DAG: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; NOSDWA-DAG: v_and_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
 ; NOSDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
-; NOSDWA-DAG: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; NOSDWA-DAG: v_and_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
 ; NOSDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; NOSDWA-NOT: v_and_b32_sdwa

Modified: llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll?rev=305815&r1=305814&r2=305815&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll Tue Jun 20 13:32:42 2017
@@ -22,7 +22,7 @@ define amdgpu_kernel void @s_sint_to_fp_
 ; GCN: v_cndmask
 
 ; GCN-DAG: v_cmp_eq_u64
-; GCN-DAG: v_cmp_lt_u64
+; GCN-DAG: v_cmp_gt_u64
 
 ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
 ; GCN: v_cndmask_b32_e{{32|64}} [[SIGN_SEL:v[0-9]+]],
@@ -57,7 +57,7 @@ define amdgpu_kernel void @s_sint_to_fp_
 ; GCN: v_cndmask
 
 ; GCN-DAG: v_cmp_eq_u64
-; GCN-DAG: v_cmp_lt_u64
+; GCN-DAG: v_cmp_gt_u64
 
 ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
 ; GCN: v_cndmask_b32_e{{32|64}} [[SIGN_SEL:v[0-9]+]],

Modified: llvm/trunk/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uint_to_fp.i64.ll?rev=305815&r1=305814&r2=305815&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uint_to_fp.i64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uint_to_fp.i64.ll Tue Jun 20 13:32:42 2017
@@ -19,7 +19,7 @@ define amdgpu_kernel void @s_uint_to_fp_
 ; GCN: v_cndmask
 
 ; GCN-DAG: v_cmp_eq_u64
-; GCN-DAG: v_cmp_lt_u64
+; GCN-DAG: v_cmp_gt_u64
 
 ; GCN: v_add_i32_e32 [[VR:v[0-9]+]]
 ; GCN: v_cvt_f16_f32_e32 [[VR_F16:v[0-9]+]], [[VR]]
@@ -50,7 +50,7 @@ define amdgpu_kernel void @s_uint_to_fp_
 ; GCN: v_cndmask
 
 ; GCN-DAG: v_cmp_eq_u64
-; GCN-DAG: v_cmp_lt_u64
+; GCN-DAG: v_cmp_gt_u64
 
 ; GCN: v_add_i32_e32 [[VR:v[0-9]+]]
 ; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]]




More information about the llvm-commits mailing list