[llvm] r296375 - AMDGPU: Don't fold immediate if clamp/omod are set

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 27 12:21:32 PST 2017


Author: arsenm
Date: Mon Feb 27 14:21:31 2017
New Revision: 296375

URL: http://llvm.org/viewvc/llvm-project?rev=296375&view=rev
Log:
AMDGPU: Don't fold immediate if clamp/omod are set

Doesn't fix any practical problems because clamp/omod
are currently folded after peephole optimizer.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=296375&r1=296374&r2=296375&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Mon Feb 27 14:21:31 2017
@@ -1537,15 +1537,10 @@ bool SIInstrInfo::FoldImmediate(MachineI
 
   if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
       Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
-    bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
-
-    // Don't fold if we are using source modifiers. The new VOP2 instructions
-    // don't have them.
-    if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) ||
-        hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) ||
-        hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) {
+    // Don't fold if we are using source or output modifiers. The new VOP2
+    // instructions don't have them.
+    if (hasAnyModifiersSet(UseMI))
       return false;
-    }
 
     const MachineOperand &ImmOp = DefMI.getOperand(1);
 
@@ -1558,6 +1553,7 @@ bool SIInstrInfo::FoldImmediate(MachineI
     if (isInlineConstant(UseMI, *Src0, ImmOp))
       return false;
 
+    bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
     MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
     MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
 
@@ -1944,6 +1940,14 @@ bool SIInstrInfo::hasModifiersSet(const
   return Mods && Mods->getImm();
 }
 
+bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
+  return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
+         hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
+         hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
+         hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
+         hasModifiersSet(MI, AMDGPU::OpName::omod);
+}
+
 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
                                   const MachineOperand &MO,
                                   const MCOperandInfo &OpInfo) const {

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=296375&r1=296374&r2=296375&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Mon Feb 27 14:21:31 2017
@@ -607,6 +607,7 @@ public:
 
   bool hasModifiersSet(const MachineInstr &MI,
                        unsigned OpName) const;
+  bool hasAnyModifiersSet(const MachineInstr &MI) const;
 
   bool verifyInstruction(const MachineInstr &MI,
                          StringRef &ErrInfo) const override;

Added: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir?rev=296375&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir Mon Feb 27 14:21:31 2017
@@ -0,0 +1,306 @@
+# RUN: llc -march=amdgcn -run-pass peephole-opt -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+--- |
+  define amdgpu_kernel void @no_fold_imm_madak_mac_clamp_f32() #0 {
+    ret void
+  }
+
+  define amdgpu_kernel void @no_fold_imm_madak_mac_omod_f32() #0 {
+    ret void
+  }
+
+  define amdgpu_kernel void @no_fold_imm_madak_mad_clamp_f32() #0 {
+    ret void
+  }
+
+  define amdgpu_kernel void @no_fold_imm_madak_mad_omod_f32() #0 {
+    ret void
+  }
+
+  attributes #0 = { nounwind }
+
+...
+---
+# GCN-LABEL: name: no_fold_imm_madak_mac_clamp_f32
+# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec
+# GCN-NEXT: %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec
+
+name:            no_fold_imm_madak_mac_clamp_f32
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_64_xexec }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32 }
+  - { id: 9, class: sreg_32_xm0 }
+  - { id: 10, class: sreg_64 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sreg_32_xm0 }
+  - { id: 13, class: sgpr_64 }
+  - { id: 14, class: sgpr_128 }
+  - { id: 15, class: sreg_32_xm0 }
+  - { id: 16, class: sreg_64 }
+  - { id: 17, class: sgpr_128 }
+  - { id: 18, class: sgpr_128 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: vgpr_32 }
+  - { id: 22, class: vreg_64 }
+  - { id: 23, class: vgpr_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vgpr_32 }
+  - { id: 26, class: vreg_64 }
+  - { id: 27, class: vgpr_32 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vreg_64 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %28 = REG_SEQUENCE %3, 1, %27, 2
+    %11 = S_MOV_B32 61440
+    %12 = S_MOV_B32 0
+    %13 = REG_SEQUENCE killed %12, 1, killed %11, 2
+    %14 = REG_SEQUENCE killed %5, 17, %13, 18
+    %15 = S_MOV_B32 2
+    %29 = V_LSHL_B64 killed %28, killed %15, implicit %exec
+    %17 = REG_SEQUENCE killed %6, 17, %13, 18
+    %18 = REG_SEQUENCE killed %4, 17, %13, 18
+    %20 = COPY %29
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit %exec
+    %22 = COPY %29
+    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit %exec
+    %23 = V_MOV_B32_e32 1090519040, implicit %exec
+    %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec
+    %26 = COPY %29
+    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+
+...
+---
+# GCN-LABEL: name: no_fold_imm_madak_mac_omod_f32
+# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec
+# GCN: %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit %exec
+
+name:            no_fold_imm_madak_mac_omod_f32
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_64_xexec }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32 }
+  - { id: 9, class: sreg_32_xm0 }
+  - { id: 10, class: sreg_64 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sreg_32_xm0 }
+  - { id: 13, class: sgpr_64 }
+  - { id: 14, class: sgpr_128 }
+  - { id: 15, class: sreg_32_xm0 }
+  - { id: 16, class: sreg_64 }
+  - { id: 17, class: sgpr_128 }
+  - { id: 18, class: sgpr_128 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: vgpr_32 }
+  - { id: 22, class: vreg_64 }
+  - { id: 23, class: vgpr_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vgpr_32 }
+  - { id: 26, class: vreg_64 }
+  - { id: 27, class: vgpr_32 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vreg_64 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %28 = REG_SEQUENCE %3, 1, %27, 2
+    %11 = S_MOV_B32 61440
+    %12 = S_MOV_B32 0
+    %13 = REG_SEQUENCE killed %12, 1, killed %11, 2
+    %14 = REG_SEQUENCE killed %5, 17, %13, 18
+    %15 = S_MOV_B32 2
+    %29 = V_LSHL_B64 killed %28, killed %15, implicit %exec
+    %17 = REG_SEQUENCE killed %6, 17, %13, 18
+    %18 = REG_SEQUENCE killed %4, 17, %13, 18
+    %20 = COPY %29
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit %exec
+    %22 = COPY %29
+    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit %exec
+    %23 = V_MOV_B32_e32 1090519040, implicit %exec
+    %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit %exec
+    %26 = COPY %29
+    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+
+...
+---
+# GCN: name: no_fold_imm_madak_mad_clamp_f32
+# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec
+# GCN: %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec
+
+name:            no_fold_imm_madak_mad_clamp_f32
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_64_xexec }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32 }
+  - { id: 9, class: sreg_32_xm0 }
+  - { id: 10, class: sreg_64 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sreg_32_xm0 }
+  - { id: 13, class: sgpr_64 }
+  - { id: 14, class: sgpr_128 }
+  - { id: 15, class: sreg_32_xm0 }
+  - { id: 16, class: sreg_64 }
+  - { id: 17, class: sgpr_128 }
+  - { id: 18, class: sgpr_128 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: vgpr_32 }
+  - { id: 22, class: vreg_64 }
+  - { id: 23, class: vgpr_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vgpr_32 }
+  - { id: 26, class: vreg_64 }
+  - { id: 27, class: vgpr_32 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vreg_64 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %28 = REG_SEQUENCE %3, 1, %27, 2
+    %11 = S_MOV_B32 61440
+    %12 = S_MOV_B32 0
+    %13 = REG_SEQUENCE killed %12, 1, killed %11, 2
+    %14 = REG_SEQUENCE killed %5, 17, %13, 18
+    %15 = S_MOV_B32 2
+    %29 = V_LSHL_B64 killed %28, killed %15, implicit %exec
+    %17 = REG_SEQUENCE killed %6, 17, %13, 18
+    %18 = REG_SEQUENCE killed %4, 17, %13, 18
+    %20 = COPY %29
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit %exec
+    %22 = COPY %29
+    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit %exec
+    %23 = V_MOV_B32_e32 1090519040, implicit %exec
+    %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec
+    %26 = COPY %29
+    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+
+...
+---
+# GCN: name: no_fold_imm_madak_mad_omod_f32
+# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec
+# GCN: %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit %exec
+
+name:            no_fold_imm_madak_mad_omod_f32
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: sreg_32_xm0 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: sreg_64_xexec }
+  - { id: 5, class: sreg_64_xexec }
+  - { id: 6, class: sreg_64_xexec }
+  - { id: 7, class: sreg_32 }
+  - { id: 8, class: sreg_32 }
+  - { id: 9, class: sreg_32_xm0 }
+  - { id: 10, class: sreg_64 }
+  - { id: 11, class: sreg_32_xm0 }
+  - { id: 12, class: sreg_32_xm0 }
+  - { id: 13, class: sgpr_64 }
+  - { id: 14, class: sgpr_128 }
+  - { id: 15, class: sreg_32_xm0 }
+  - { id: 16, class: sreg_64 }
+  - { id: 17, class: sgpr_128 }
+  - { id: 18, class: sgpr_128 }
+  - { id: 19, class: vgpr_32 }
+  - { id: 20, class: vreg_64 }
+  - { id: 21, class: vgpr_32 }
+  - { id: 22, class: vreg_64 }
+  - { id: 23, class: vgpr_32 }
+  - { id: 24, class: vgpr_32 }
+  - { id: 25, class: vgpr_32 }
+  - { id: 26, class: vreg_64 }
+  - { id: 27, class: vgpr_32 }
+  - { id: 28, class: vreg_64 }
+  - { id: 29, class: vreg_64 }
+liveins:
+  - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+  - { reg: '%vgpr0', virtual-reg: '%3' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %sgpr0_sgpr1, %vgpr0
+
+    %3 = COPY %vgpr0
+    %0 = COPY %sgpr0_sgpr1
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec
+    %28 = REG_SEQUENCE %3, 1, %27, 2
+    %11 = S_MOV_B32 61440
+    %12 = S_MOV_B32 0
+    %13 = REG_SEQUENCE killed %12, 1, killed %11, 2
+    %14 = REG_SEQUENCE killed %5, 17, %13, 18
+    %15 = S_MOV_B32 2
+    %29 = V_LSHL_B64 killed %28, killed %15, implicit %exec
+    %17 = REG_SEQUENCE killed %6, 17, %13, 18
+    %18 = REG_SEQUENCE killed %4, 17, %13, 18
+    %20 = COPY %29
+    %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit %exec
+    %22 = COPY %29
+    %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit %exec
+    %23 = V_MOV_B32_e32 1090519040, implicit %exec
+    %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit %exec
+    %26 = COPY %29
+    BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+
+...




More information about the llvm-commits mailing list