[llvm] 9ef166e - [AMDGPU] Fix FoldImmediate for 16 bit operand

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue May 5 10:19:25 PDT 2020


Author: Stanislav Mekhanoshin
Date: 2020-05-05T10:19:14-07:00
New Revision: 9ef166e65748cdb3b8c7c74a51113a58f2a7753d

URL: https://github.com/llvm/llvm-project/commit/9ef166e65748cdb3b8c7c74a51113a58f2a7753d
DIFF: https://github.com/llvm/llvm-project/commit/9ef166e65748cdb3b8c7c74a51113a58f2a7753d.diff

LOG: [AMDGPU] Fix FoldImmediate for 16 bit operand

Differential Revision: https://reviews.llvm.org/D79362

Added: 
    llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e0b23f2aafd3..b00dc72d73ce 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2509,15 +2509,41 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
 
   unsigned Opc = UseMI.getOpcode();
   if (Opc == AMDGPU::COPY) {
-    bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
+    Register DstReg = UseMI.getOperand(0).getReg();
+    Register SrcReg = UseMI.getOperand(1).getReg();
+    bool Is16Bit = getOpSize(UseMI, 0) == 2;
+    bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
     unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
-    if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) {
-      if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))
+    APInt Imm(32, ImmOp->getImm());
+
+    if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
+      Imm = Imm.ashr(16);
+
+    if (RI.isAGPR(*MRI, DstReg)) {
+      if (!isInlineConstant(Imm))
         return false;
       NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
     }
+
+    if (Is16Bit) {
+       if (isVGPRCopy)
+         return false; // Do not clobber vgpr_hi16
+
+       if (DstReg.isVirtual() &&
+           UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
+         return false;
+
+      UseMI.getOperand(0).setSubReg(0);
+      if (DstReg.isPhysical()) {
+        DstReg = RI.get32BitRegister(DstReg);
+        UseMI.getOperand(0).setReg(DstReg);
+      }
+      assert(SrcReg.isVirtual());
+    }
+
     UseMI.setDesc(get(NewOpc));
-    UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
+    UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
+    UseMI.getOperand(1).setTargetFlags(0);
     UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
     return true;
   }

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 9fcc5caf7dfd..8231a96f5f6b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -827,11 +827,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     const MachineOperand &MO = MI.getOperand(OpNo);
     if (MO.isReg()) {
       if (unsigned SubReg = MO.getSubReg()) {
-        assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
-                                   MI.getParent()->getParent()->getRegInfo().
-                                     getRegClass(MO.getReg()), SubReg)) >= 32 &&
-               "Sub-dword subregs are not supported");
-        return RI.getNumChannelsFromSubReg(SubReg) * 4;
+        return RI.getSubRegIdxSize(SubReg) / 8;
       }
     }
     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;

diff  --git a/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir b/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir
new file mode 100644
index 000000000000..458bdcef1a58
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir
@@ -0,0 +1,257 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name:            fold_simm_16_sub_to_lo
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_simm_16_sub_to_lo
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: [[COPY:%[0-9]+]]:sgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
+    ; GCN: SI_RETURN_TO_EPILOG [[COPY]]
+    %0:sreg_32 = S_MOV_B32 2048
+    %1:sgpr_lo16 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_simm_16_sub_to_sub
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_simm_16_sub_to_sub
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
+    %0:sreg_32 = S_MOV_B32 2048
+    %1.lo16:sreg_32 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_simm_16_sub_to_phys
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_simm_16_sub_to_phys
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: $sgpr0 = S_MOV_B32 2048
+    ; GCN: SI_RETURN_TO_EPILOG $sgpr0_lo16
+    %0:sreg_32 = S_MOV_B32 2048
+    $sgpr0_lo16 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG $sgpr0_lo16
+
+...
+
+---
+name:            fold_aimm_16_sub_to_sub_2048
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].lo16
+    ; GCN: SI_RETURN_TO_EPILOG %1
+    %0:sreg_32 = S_MOV_B32 2048
+    %1.lo16:agpr_32 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_aimm_16_sub_to_sub_0
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_0
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
+    ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
+    %0:sreg_32 = S_MOV_B32 0
+    %1.lo16:agpr_32 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_aimm_16_sub_to_phys
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_aimm_16_sub_to_phys
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 0, implicit $exec
+    ; GCN: SI_RETURN_TO_EPILOG $agpr0_lo16
+    %0:sreg_32 = S_MOV_B32 0
+    $agpr0_lo16 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG $agpr0_lo16
+
+...
+
+---
+name:            fold_vimm_16_sub_to_lo
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_vimm_16_sub_to_lo
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
+    ; GCN: SI_RETURN_TO_EPILOG [[COPY]]
+    %0:sreg_32 = S_MOV_B32 2048
+    %1:vgpr_lo16 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_vimm_16_sub_to_sub
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_vimm_16_sub_to_sub
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
+    ; GCN: SI_RETURN_TO_EPILOG %1
+    %0:sreg_32 = S_MOV_B32 2048
+    %1.lo16:vgpr_32 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_vimm_16_sub_to_phys
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_vimm_16_sub_to_phys
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: $vgpr0_lo16 = COPY killed [[S_MOV_B32_]].lo16
+    ; GCN: SI_RETURN_TO_EPILOG $vgpr0_lo16
+    %0:sreg_32 = S_MOV_B32 2048
+    $vgpr0_lo16 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG $vgpr0_lo16
+
+...
+
+---
+name:            fold_vimm_16_lo_to_hi
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_vimm_16_lo_to_hi
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: %1.hi16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
+    ; GCN: SI_RETURN_TO_EPILOG %1
+    %0:sreg_32 = S_MOV_B32 2048
+    %1.hi16:vgpr_32 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_vimm_16_hi_to_lo
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_vimm_16_hi_to_lo
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].hi16
+    ; GCN: SI_RETURN_TO_EPILOG %1
+    %0:sreg_32 = S_MOV_B32 2048
+    %1.lo16:vgpr_32 = COPY killed %0.hi16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_simm_16_sub_to_sub_lo_to_hi
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_simm_16_sub_to_sub_lo_to_hi
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: %1.hi16:sreg_32 = COPY killed [[S_MOV_B32_]].lo16
+    ; GCN: SI_RETURN_TO_EPILOG %1
+    %0:sreg_32 = S_MOV_B32 2048
+    %1.hi16:sreg_32 = COPY killed %0.lo16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_simm_16_sub_to_sub_hi_to_lo_2048
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_2048
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
+    %0:sreg_32 = S_MOV_B32 2048
+    %1.lo16:sreg_32 = COPY killed %0.hi16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
+    ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
+    %0:sreg_32 = S_MOV_B32 134217728
+    %1.lo16:sreg_32 = COPY killed %0.hi16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_aimm_16_sub_to_sub_hi_to_lo_2048
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_2048
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+    ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
+    ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
+    %0:sreg_32 = S_MOV_B32 2048
+    %1.lo16:agpr_32 = COPY killed %0.hi16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65536
+    ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 1, implicit $exec
+    ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
+    %0:sreg_32 = S_MOV_B32 65536
+    %1.lo16:agpr_32 = COPY killed %0.hi16
+    SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name:            fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
+    ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].hi16
+    ; GCN: SI_RETURN_TO_EPILOG %1
+    %0:sreg_32 = S_MOV_B32 134217728
+    %1.lo16:agpr_32 = COPY killed %0.hi16
+    SI_RETURN_TO_EPILOG %1
+
+...


        


More information about the llvm-commits mailing list