[llvm] r364808 - AMDGPU/GlobalISel: RegBankSelect for amdgcn.writelane
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 09:41:36 PDT 2019
Author: arsenm
Date: Mon Jul 1 09:41:36 2019
New Revision: 364808
URL: http://llvm.org/viewvc/llvm-project?rev=364808&view=rev
Log:
AMDGPU/GlobalISel: RegBankSelect for amdgcn.writelane
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=364808&r1=364807&r2=364808&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Mon Jul 1 09:41:36 2019
@@ -158,7 +158,25 @@ AMDGPURegisterBankInfo::getInstrAlternat
const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
}
+ case Intrinsic::amdgcn_writelane: {
+ static const OpRegBankEntry<4> Table[4] = {
+ // Perfectly legal.
+ { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+
+ // Need readfirstlane of first op
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
+
+ // Need readfirstlane of second op
+ { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
+ // Need readfirstlane of both ops
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
+ };
+
+ // rsrc, voffset, offset
+ const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
+ return addMappingFromTable<4>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+ }
default:
return RegisterBankInfo::getInstrAlternativeMappings(MI);
}
@@ -764,6 +782,17 @@ void AMDGPURegisterBankInfo::constrainOp
MI.getOperand(OpIdx).setReg(SGPR);
}
+// For cases where only a single copy is inserted for matching register banks.
+// Replace the register in the instruction operand
+static void substituteSimpleCopyRegs(
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
+ SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
+ if (!SrcReg.empty()) {
+ assert(SrcReg.size() == 1);
+ OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
+ }
+}
+
void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -982,16 +1011,23 @@ void AMDGPURegisterBankInfo::applyMappin
return;
}
case Intrinsic::amdgcn_readlane: {
- SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(2));
+ substituteSimpleCopyRegs(OpdMapper, 2);
- if (!SrcReg.empty()) {
- assert(SrcReg.size() == 1);
- MI.getOperand(2).setReg(SrcReg[0]);
- }
+ assert(empty(OpdMapper.getVRegs(0)));
+ assert(empty(OpdMapper.getVRegs(3)));
+ // Make sure the index is an SGPR. It doesn't make sense to run this in a
+ // waterfall loop, so assume it's a uniform value.
+ constrainOpWithReadfirstlane(MI, MRI, 3); // Index
+ return;
+ }
+ case Intrinsic::amdgcn_writelane: {
assert(empty(OpdMapper.getVRegs(0)));
+ assert(empty(OpdMapper.getVRegs(2)));
assert(empty(OpdMapper.getVRegs(3)));
+ substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
+ constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
constrainOpWithReadfirstlane(MI, MRI, 3); // Index
return;
}
@@ -1664,6 +1700,23 @@ AMDGPURegisterBankInfo::getInstrMapping(
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
break;
}
+ case Intrinsic::amdgcn_writelane: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcReg = MI.getOperand(2).getReg();
+ unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
+ unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ unsigned IdxReg = MI.getOperand(3).getReg();
+ unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
+ unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+
+ // These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
+ // to legalize.
+ OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
+ OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
+ OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
+ break;
+ }
}
break;
}
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir?rev=364808&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir Mon Jul 1 09:41:36 2019
@@ -0,0 +1,98 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+
+---
+name: writelane_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: writelane_sss
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+...
+
+---
+name: writelane_ssv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0
+ ; CHECK-LABEL: name: writelane_ssv
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $vgpr0
+ %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+...
+
+---
+name: writelane_vsv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: writelane_vsv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]], [[COPY1]](s32), [[COPY2]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s32) = COPY $vgpr1
+ %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+...
+
+---
+name: writelane_vvv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: writelane_vvv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
+ ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]], [[V_READFIRSTLANE_B32_1]], [[COPY2]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+...
+
+---
+name: writelane_svv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: writelane_svv
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]], [[COPY2]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s32) = COPY $vgpr1
+ %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
+...
More information about the llvm-commits
mailing list