[PATCH] D71924: AMDGPU/GlobalISel: Legalize 2-element dynamic vector insert
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 27 05:51:10 PST 2019
arsenm created this revision.
arsenm added reviewers: tstellar, nhaehnle, kerbowa.
Herald added subscribers: Petar.Avramovic, hiraditya, t-tye, tpr, dstuttard, rovka, yaxunl, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
Use a compare of the index and select, which is cheaper and simpler to
implement than register indexing.
https://reviews.llvm.org/D71924
Files:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -67,8 +67,13 @@
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
- ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32)
- ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]]
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]]
+ ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[COPY1]]
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s64) = COPY $vgpr3_vgpr4
@@ -131,8 +136,13 @@
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32)
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
- ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[ASHR]](s32)
- ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ASHR]](s32), [[C2]]
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]]
+ ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[COPY1]]
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s8) = G_CONSTANT i8 0
Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1537,17 +1537,36 @@
MachineIRBuilder &B) const {
// TODO: Should move some of this into LegalizerHelper.
- // TODO: Promote dynamic indexing of s16 to s32
- // TODO: Dynamic s64 indexing is only legal for SGPR.
- Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
- if (!IdxVal) // Dynamic case will be selected to register indexing.
- return true;
-
Register Dst = MI.getOperand(0).getReg();
Register Vec = MI.getOperand(1).getReg();
Register Ins = MI.getOperand(2).getReg();
-
+ Register Idx = MI.getOperand(3).getReg();
LLT VecTy = MRI.getType(Vec);
+
+ // TODO: Promote dynamic indexing of s16 to s32
+ // TODO: Dynamic s64 indexing is only legal for SGPR.
+ Optional<int64_t> IdxVal = getConstantVRegVal(Idx, MRI);
+ if (!IdxVal) {
+ // Dynamic case will be selected to register indexing.
+ if (VecTy.getNumElements() != 2)
+ return true;
+
+ // Compare and select is a lot cheaper than indexing in small cases.
+ LLT EltTy = VecTy.getElementType();
+ assert(MRI.getType(Ins) == EltTy);
+
+ B.setInstr(MI);
+ auto Zero = B.buildConstant(LLT::scalar(32), 0);
+ auto Eq0 = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Idx, Zero);
+ auto Unmerge = B.buildUnmerge(EltTy, Vec);
+
+ auto Lo = B.buildSelect(EltTy, Eq0, Ins, Unmerge.getReg(0));
+ auto Hi = B.buildSelect(EltTy, Eq0, Unmerge.getReg(1), Ins);
+ B.buildBuildVector(Dst, {Lo.getReg(0), Hi.getReg(0)});
+ MI.eraseFromParent();
+ return true;
+ }
+
LLT EltTy = VecTy.getElementType();
assert(EltTy == MRI.getType(Ins));
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D71924.235406.patch
Type: text/x-patch
Size: 4179 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191227/b6d3cbb4/attachment.bin>
More information about the llvm-commits
mailing list