[llvm] r371440 - AMDGPU/GlobalISel: Legalize G_BUILD_VECTOR v2s16
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 11:57:51 PDT 2019
Author: arsenm
Date: Mon Sep 9 11:57:51 2019
New Revision: 371440
URL: http://llvm.org/viewvc/llvm-project?rev=371440&view=rev
Log:
AMDGPU/GlobalISel: Legalize G_BUILD_VECTOR v2s16
Handle it the same way as G_BUILD_VECTOR_TRUNC. Arguably only
G_BUILD_VECTOR_TRUNC should be legal for this, but G_BUILD_VECTOR will
probably be more convenient in most cases.
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.v2s16.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=371440&r1=371439&r2=371440&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Mon Sep 9 11:57:51 2019
@@ -713,14 +713,19 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
}
- getActionDefinitionsBuilder(G_BUILD_VECTOR)
- .legalForCartesianProduct(AllS32Vectors, {S32})
- .legalForCartesianProduct(AllS64Vectors, {S64})
- .clampNumElements(0, V16S32, V16S32)
- .clampNumElements(0, V2S64, V8S64)
- .minScalarSameAs(1, 0)
- .legalIf(isRegisterType(0))
- .minScalarOrElt(0, S32);
+ auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR)
+ .legalForCartesianProduct(AllS32Vectors, {S32})
+ .legalForCartesianProduct(AllS64Vectors, {S64})
+ .clampNumElements(0, V16S32, V16S32)
+ .clampNumElements(0, V2S64, V8S64);
+
+ if (ST.hasScalarPackInsts())
+ BuildVector.legalFor({V2S16, S32});
+
+ BuildVector
+ .minScalarSameAs(1, 0)
+ .legalIf(isRegisterType(0))
+ .minScalarOrElt(0, S32);
if (ST.hasScalarPackInsts()) {
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=371440&r1=371439&r2=371440&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Mon Sep 9 11:57:51 2019
@@ -1305,12 +1305,17 @@ void AMDGPURegisterBankInfo::applyMappin
MI.eraseFromParent();
return;
}
+ case AMDGPU::G_BUILD_VECTOR:
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy != LLT::vector(2, 16))
+ break;
+
assert(MI.getNumOperands() == 3 && empty(OpdMapper.getVRegs(0)));
substituteSimpleCopyRegs(OpdMapper, 1);
substituteSimpleCopyRegs(OpdMapper, 2);
- Register DstReg = MI.getOperand(0).getReg();
const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
if (DstBank == &AMDGPU::SGPRRegBank)
break; // Can use S_PACK_* instructions.
@@ -1319,24 +1324,41 @@ void AMDGPURegisterBankInfo::applyMappin
Register Lo = MI.getOperand(1).getReg();
Register Hi = MI.getOperand(2).getReg();
+ const LLT S32 = LLT::scalar(32);
const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI);
const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI);
- const LLT S32 = LLT::scalar(32);
- auto MaskLo = B.buildConstant(S32, 0xffff);
- MRI.setRegBank(MaskLo.getReg(0), *BankLo);
+ Register ZextLo;
+ Register ShiftHi;
- auto ShiftAmt = B.buildConstant(S32, 16);
- MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
+ if (Opc == AMDGPU::G_BUILD_VECTOR) {
+ ZextLo = B.buildZExt(S32, Lo).getReg(0);
+ MRI.setRegBank(ZextLo, *BankLo);
- auto ShiftHi = B.buildShl(S32, Hi, ShiftAmt);
- MRI.setRegBank(ShiftHi.getReg(0), *BankHi);
+ Register ZextHi = B.buildZExt(S32, Hi).getReg(0);
+ MRI.setRegBank(ZextHi, *BankHi);
- auto Masked = B.buildAnd(S32, Lo, MaskLo);
- MRI.setRegBank(Masked.getReg(0), *BankLo);
+ auto ShiftAmt = B.buildConstant(S32, 16);
+ MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
- auto Or = B.buildOr(S32, Masked, ShiftHi);
+ ShiftHi = B.buildShl(S32, ZextHi, ShiftAmt).getReg(0);
+ MRI.setRegBank(ShiftHi, *BankHi);
+ } else {
+ Register MaskLo = B.buildConstant(S32, 0xffff).getReg(0);
+ MRI.setRegBank(MaskLo, *BankLo);
+
+ auto ShiftAmt = B.buildConstant(S32, 16);
+ MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
+
+ ShiftHi = B.buildShl(S32, Hi, ShiftAmt).getReg(0);
+ MRI.setRegBank(ShiftHi, *BankHi);
+
+ ZextLo = B.buildAnd(S32, Lo, MaskLo).getReg(0);
+ MRI.setRegBank(ZextLo, *BankLo);
+ }
+
+ auto Or = B.buildOr(S32, ZextLo, ShiftHi);
MRI.setRegBank(Or.getReg(0), *DstBank);
B.buildBitcast(DstReg, Or);
@@ -1804,8 +1826,25 @@ AMDGPURegisterBankInfo::getInstrMapping(
OpdsMapping[2] = nullptr;
break;
}
- case AMDGPU::G_MERGE_VALUES:
case AMDGPU::G_BUILD_VECTOR:
+ case AMDGPU::G_BUILD_VECTOR_TRUNC: {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (DstTy == LLT::vector(2, 16)) {
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
+ break;
+ }
+
+ LLVM_FALLTHROUGH;
+ }
+ case AMDGPU::G_MERGE_VALUES:
case AMDGPU::G_CONCAT_VECTORS: {
unsigned Bank = isSALUMapping(MI) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
@@ -1818,20 +1857,6 @@ AMDGPURegisterBankInfo::getInstrMapping(
OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
break;
}
- case AMDGPU::G_BUILD_VECTOR_TRUNC: {
- assert(MI.getNumOperands() == 3);
-
- unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
- unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
- unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
-
- OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
- OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
- OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
- break;
- }
case AMDGPU::G_BITCAST:
case AMDGPU::G_INTTOPTR:
case AMDGPU::G_PTRTOINT:
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.v2s16.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.v2s16.mir?rev=371440&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.v2s16.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.v2s16.mir Mon Sep 9 11:57:51 2019
@@ -0,0 +1,99 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: build_vector_v2s16_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: build_vector_v2s16_s32_ss
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3
+...
+
+---
+name: build_vector_v2s16_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: build_vector_v2s16_s32_sv
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC1]](s16)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[ZEXT]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3
+...
+
+---
+name: build_vector_v2s16_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr0
+ ; CHECK-LABEL: name: build_vector_v2s16_s32_vs
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[ZEXT]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3
+...
+
+---
+name: build_vector_v2s16_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: build_vector_v2s16_s32_vv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC1]](s16)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[ZEXT]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3
+...
More information about the llvm-commits
mailing list