[llvm] c5497e5 - AMDGPU/GlobalISel: Fix legalizing <3 x s16> vselects
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 11 13:02:20 PDT 2020
Author: Matt Arsenault
Date: 2020-04-11T15:59:51-04:00
New Revision: c5497e539990e4987afd8ec7263f7935ee6cd41f
URL: https://github.com/llvm/llvm-project/commit/c5497e539990e4987afd8ec7263f7935ee6cd41f
DIFF: https://github.com/llvm/llvm-project/commit/c5497e539990e4987afd8ec7263f7935ee6cd41f.diff
LOG: AMDGPU/GlobalISel: Fix legalizing <3 x s16> vselects
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5ee3267822b7..9f955b821cc8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1066,9 +1066,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1, S32})
.clampScalar(0, S16, S64)
+ .scalarize(1)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.fewerElementsIf(numElementsNotEven(0), scalarize(0))
- .scalarize(1)
.clampMaxNumElements(0, S32, 2)
.clampMaxNumElements(0, LocalPtr, 2)
.clampMaxNumElements(0, PrivatePtr, 2)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
index a65997a63788..26b69109da46 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
@@ -1197,3 +1197,142 @@ body: |
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5
...
+
+---
+name: test_vselect_v3s8
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11
+ ; CHECK-LABEL: name: test_vselect_v3s8
+ ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+ ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
+ ; CHECK: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]]
+ ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]]
+ ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]]
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY2]](<3 x s32>)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY3]](<3 x s32>)
+ ; CHECK: [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<3 x s8>)
+ ; CHECK: [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC1]](<3 x s8>)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
+ ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8)
+ ; CHECK: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
+ ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8)
+ ; CHECK: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[ANYEXT2]], [[ANYEXT3]]
+ ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8)
+ ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8)
+ ; CHECK: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[ANYEXT4]], [[ANYEXT5]]
+ ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16)
+ ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16)
+ ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16)
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT6]](s32)
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ANYEXT7]](s32)
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ANYEXT8]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+ %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
+ %3:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11
+ %4:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1
+ %5:_(<3 x s8>) = G_TRUNC %2
+ %6:_(<3 x s8>) = G_TRUNC %3
+ %7:_(<3 x s8>) = G_SELECT %4, %5, %6
+ %8:_(<3 x s32>) = G_ANYEXT %7
+ $vgpr0_vgpr1_vgpr2 = COPY %8
+
+...
+
+---
+name: test_vselect_v3s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8, $vgpr9_vgpr10_vgpr11
+ ; CHECK-LABEL: name: test_vselect_v3s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+ ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
+ ; CHECK: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]]
+ ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]]
+ ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]]
+ ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV8]](s32), [[DEF]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[UV10]](s32)
+ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV11]](s32), [[DEF]](s32)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>)
+ ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR3]](<2 x s32>)
+ ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>), [[DEF1]](<2 x s16>)
+ ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
+ ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+ ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+ ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+ ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
+ ; CHECK: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+ ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+ ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+ ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; CHECK: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC4]], [[TRUNC7]]
+ ; CHECK: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC5]], [[TRUNC8]]
+ ; CHECK: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC6]], [[TRUNC9]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16)
+ ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16)
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
+ ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
+ ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
+ ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0
+ ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0
+ ; CHECK: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+ ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+ ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+ ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+ ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+ ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR4]](<3 x s32>)
+ %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+ %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
+ %3:_(<3 x s32>) = COPY $vgpr9_vgpr10_vgpr11
+ %4:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1
+ %5:_(<3 x s16>) = G_TRUNC %2
+ %6:_(<3 x s16>) = G_TRUNC %3
+ %7:_(<3 x s16>) = G_SELECT %4, %5, %6
+ %8:_(<3 x s32>) = G_ANYEXT %7
+ $vgpr0_vgpr1_vgpr2 = COPY %8
+
+...
More information about the llvm-commits
mailing list