[llvm] r371423 - AMDGPU/GlobalISel: Legalize G_BUILD_VECTOR_TRUNC
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 10:04:18 PDT 2019
Author: arsenm
Date: Mon Sep 9 10:04:18 2019
New Revision: 371423
URL: http://llvm.org/viewvc/llvm-project?rev=371423&view=rev
Log:
AMDGPU/GlobalISel: Legalize G_BUILD_VECTOR_TRUNC
Treat this as legal on gfx9 since it can use S_PACK_* instructions for
this.
This isn't used by anything yet. The same will probably apply to
16-bit G_BUILD_VECTOR without the trunc.
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=371423&r1=371422&r2=371423&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Mon Sep 9 10:04:18 2019
@@ -719,6 +719,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
.legalIf(isRegisterType(0))
.minScalarOrElt(0, S32);
+ if (ST.hasScalarPackInsts()) {
+ getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
+ .legalFor({V2S16, S32})
+ .lower();
+ } else {
+ getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
+ .lower();
+ }
+
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalIf(isRegisterType(0));
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=371423&r1=371422&r2=371423&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Mon Sep 9 10:04:18 2019
@@ -1305,6 +1305,44 @@ void AMDGPURegisterBankInfo::applyMappin
MI.eraseFromParent();
return;
}
+ case AMDGPU::G_BUILD_VECTOR_TRUNC: {
+ assert(MI.getNumOperands() == 3 && empty(OpdMapper.getVRegs(0)));
+ substituteSimpleCopyRegs(OpdMapper, 1);
+ substituteSimpleCopyRegs(OpdMapper, 2);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+ if (DstBank == &AMDGPU::SGPRRegBank)
+ break; // Can use S_PACK_* instructions.
+
+ MachineIRBuilder B(MI);
+
+ Register Lo = MI.getOperand(1).getReg();
+ Register Hi = MI.getOperand(2).getReg();
+
+ const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI);
+ const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI);
+
+ const LLT S32 = LLT::scalar(32);
+ auto MaskLo = B.buildConstant(S32, 0xffff);
+ MRI.setRegBank(MaskLo.getReg(0), *BankLo);
+
+ auto ShiftAmt = B.buildConstant(S32, 16);
+ MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
+
+ auto ShiftHi = B.buildShl(S32, Hi, ShiftAmt);
+ MRI.setRegBank(ShiftHi.getReg(0), *BankHi);
+
+ auto Masked = B.buildAnd(S32, Lo, MaskLo);
+ MRI.setRegBank(Masked.getReg(0), *BankLo);
+
+ auto Or = B.buildOr(S32, Masked, ShiftHi);
+ MRI.setRegBank(Or.getReg(0), *DstBank);
+
+ B.buildBitcast(DstReg, Or);
+ MI.eraseFromParent();
+ return;
+ }
case AMDGPU::G_EXTRACT_VECTOR_ELT:
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(MI, MRI, { 2 });
@@ -1513,6 +1551,11 @@ AMDGPURegisterBankInfo::getRegBankID(Reg
return Bank ? Bank->getID() : Default;
}
+static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
+ return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
+ AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+}
+
///
/// This function must return a legal mapping, because
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
@@ -1774,6 +1817,20 @@ AMDGPURegisterBankInfo::getInstrMapping(
OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
break;
}
+ case AMDGPU::G_BUILD_VECTOR_TRUNC: {
+ assert(MI.getNumOperands() == 3);
+
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
+ break;
+ }
case AMDGPU::G_BITCAST:
case AMDGPU::G_INTTOPTR:
case AMDGPU::G_PTRTOINT:
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=371423&r1=371422&r2=371423&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Sep 9 10:04:18 2019
@@ -551,6 +551,10 @@ public:
return GFX9Insts;
}
+ bool hasScalarPackInsts() const {
+ return GFX9Insts;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir?rev=371423&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir Mon Sep 9 10:04:18 2019
@@ -0,0 +1,19 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+
+---
+name: legal_s32_to_v2s16
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX9-LABEL: name: legal_s32_to_v2s16
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
+ S_NOP 0, implicit %2
+...
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.mir?rev=371423&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.mir Mon Sep 9 10:04:18 2019
@@ -0,0 +1,83 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: build_vector_trunc_v2s16_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_ss
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
+...
+
+---
+name: build_vector_trunc_v2s16_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_sv
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY1]], [[C1]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
+...
+
+---
+name: build_vector_trunc_v2s16_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr0
+ ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_vs
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C1]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
+...
+
+---
+name: build_vector_trunc_v2s16_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_vv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY1]], [[C1]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
+...
More information about the llvm-commits
mailing list