[llvm] 9ef166e - [AMDGPU] Fix FoldImmediate for 16 bit operand
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue May 5 10:19:25 PDT 2020
Author: Stanislav Mekhanoshin
Date: 2020-05-05T10:19:14-07:00
New Revision: 9ef166e65748cdb3b8c7c74a51113a58f2a7753d
URL: https://github.com/llvm/llvm-project/commit/9ef166e65748cdb3b8c7c74a51113a58f2a7753d
DIFF: https://github.com/llvm/llvm-project/commit/9ef166e65748cdb3b8c7c74a51113a58f2a7753d.diff
LOG: [AMDGPU] Fix FoldImmediate for 16 bit operand
Differential Revision: https://reviews.llvm.org/D79362
Added:
llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e0b23f2aafd3..b00dc72d73ce 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2509,15 +2509,41 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Opc = UseMI.getOpcode();
if (Opc == AMDGPU::COPY) {
- bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
+ Register DstReg = UseMI.getOperand(0).getReg();
+ Register SrcReg = UseMI.getOperand(1).getReg();
+ bool Is16Bit = getOpSize(UseMI, 0) == 2;
+ bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
- if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) {
- if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))
+ APInt Imm(32, ImmOp->getImm());
+
+ if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
+ Imm = Imm.ashr(16);
+
+ if (RI.isAGPR(*MRI, DstReg)) {
+ if (!isInlineConstant(Imm))
return false;
NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
}
+
+ if (Is16Bit) {
+ if (isVGPRCopy)
+ return false; // Do not clobber vgpr_hi16
+
+ if (DstReg.isVirtual() &&
+ UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
+ return false;
+
+ UseMI.getOperand(0).setSubReg(0);
+ if (DstReg.isPhysical()) {
+ DstReg = RI.get32BitRegister(DstReg);
+ UseMI.getOperand(0).setReg(DstReg);
+ }
+ assert(SrcReg.isVirtual());
+ }
+
UseMI.setDesc(get(NewOpc));
- UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
+ UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
+ UseMI.getOperand(1).setTargetFlags(0);
UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 9fcc5caf7dfd..8231a96f5f6b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -827,11 +827,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
const MachineOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) {
if (unsigned SubReg = MO.getSubReg()) {
- assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
- MI.getParent()->getParent()->getRegInfo().
- getRegClass(MO.getReg()), SubReg)) >= 32 &&
- "Sub-dword subregs are not supported");
- return RI.getNumChannelsFromSubReg(SubReg) * 4;
+ return RI.getSubRegIdxSize(SubReg) / 8;
}
}
return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
diff --git a/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir b/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir
new file mode 100644
index 000000000000..458bdcef1a58
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir
@@ -0,0 +1,257 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name: fold_simm_16_sub_to_lo
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_simm_16_sub_to_lo
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: [[COPY:%[0-9]+]]:sgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
+ ; GCN: SI_RETURN_TO_EPILOG [[COPY]]
+ %0:sreg_32 = S_MOV_B32 2048
+ %1:sgpr_lo16 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_simm_16_sub_to_sub
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_simm_16_sub_to_sub
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
+ %0:sreg_32 = S_MOV_B32 2048
+ %1.lo16:sreg_32 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_simm_16_sub_to_phys
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_simm_16_sub_to_phys
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: $sgpr0 = S_MOV_B32 2048
+ ; GCN: SI_RETURN_TO_EPILOG $sgpr0_lo16
+ %0:sreg_32 = S_MOV_B32 2048
+ $sgpr0_lo16 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG $sgpr0_lo16
+
+...
+
+---
+name: fold_aimm_16_sub_to_sub_2048
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].lo16
+ ; GCN: SI_RETURN_TO_EPILOG %1
+ %0:sreg_32 = S_MOV_B32 2048
+ %1.lo16:agpr_32 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_aimm_16_sub_to_sub_0
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_0
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
+ ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
+ %0:sreg_32 = S_MOV_B32 0
+ %1.lo16:agpr_32 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_aimm_16_sub_to_phys
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_aimm_16_sub_to_phys
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 0, implicit $exec
+ ; GCN: SI_RETURN_TO_EPILOG $agpr0_lo16
+ %0:sreg_32 = S_MOV_B32 0
+ $agpr0_lo16 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG $agpr0_lo16
+
+...
+
+---
+name: fold_vimm_16_sub_to_lo
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_vimm_16_sub_to_lo
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
+ ; GCN: SI_RETURN_TO_EPILOG [[COPY]]
+ %0:sreg_32 = S_MOV_B32 2048
+ %1:vgpr_lo16 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_vimm_16_sub_to_sub
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_vimm_16_sub_to_sub
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
+ ; GCN: SI_RETURN_TO_EPILOG %1
+ %0:sreg_32 = S_MOV_B32 2048
+ %1.lo16:vgpr_32 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_vimm_16_sub_to_phys
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_vimm_16_sub_to_phys
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: $vgpr0_lo16 = COPY killed [[S_MOV_B32_]].lo16
+ ; GCN: SI_RETURN_TO_EPILOG $vgpr0_lo16
+ %0:sreg_32 = S_MOV_B32 2048
+ $vgpr0_lo16 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG $vgpr0_lo16
+
+...
+
+---
+name: fold_vimm_16_lo_to_hi
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_vimm_16_lo_to_hi
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: %1.hi16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
+ ; GCN: SI_RETURN_TO_EPILOG %1
+ %0:sreg_32 = S_MOV_B32 2048
+ %1.hi16:vgpr_32 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_vimm_16_hi_to_lo
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_vimm_16_hi_to_lo
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].hi16
+ ; GCN: SI_RETURN_TO_EPILOG %1
+ %0:sreg_32 = S_MOV_B32 2048
+ %1.lo16:vgpr_32 = COPY killed %0.hi16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_simm_16_sub_to_sub_lo_to_hi
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_simm_16_sub_to_sub_lo_to_hi
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: %1.hi16:sreg_32 = COPY killed [[S_MOV_B32_]].lo16
+ ; GCN: SI_RETURN_TO_EPILOG %1
+ %0:sreg_32 = S_MOV_B32 2048
+ %1.hi16:sreg_32 = COPY killed %0.lo16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_simm_16_sub_to_sub_hi_to_lo_2048
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_2048
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
+ %0:sreg_32 = S_MOV_B32 2048
+ %1.lo16:sreg_32 = COPY killed %0.hi16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
+ ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
+ %0:sreg_32 = S_MOV_B32 134217728
+ %1.lo16:sreg_32 = COPY killed %0.hi16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_aimm_16_sub_to_sub_hi_to_lo_2048
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_2048
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
+ ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
+ %0:sreg_32 = S_MOV_B32 2048
+ %1.lo16:agpr_32 = COPY killed %0.hi16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65536
+ ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 1, implicit $exec
+ ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
+ %0:sreg_32 = S_MOV_B32 65536
+ %1.lo16:agpr_32 = COPY killed %0.hi16
+ SI_RETURN_TO_EPILOG %1
+
+...
+
+---
+name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
+ ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].hi16
+ ; GCN: SI_RETURN_TO_EPILOG %1
+ %0:sreg_32 = S_MOV_B32 134217728
+ %1.lo16:agpr_32 = COPY killed %0.hi16
+ SI_RETURN_TO_EPILOG %1
+
+...
More information about the llvm-commits
mailing list