[llvm] edd0dfc - AMDGPU/GlobalISel: Refine G_TRUNC legality rules
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 15:32:49 PDT 2020
Author: Matt Arsenault
Date: 2020-03-10T15:32:22-07:00
New Revision: edd0dfca0dbdcdaa7cdc3861ccecda6cc5bb9b3a
URL: https://github.com/llvm/llvm-project/commit/edd0dfca0dbdcdaa7cdc3861ccecda6cc5bb9b3a
DIFF: https://github.com/llvm/llvm-project/commit/edd0dfca0dbdcdaa7cdc3861ccecda6cc5bb9b3a.diff
LOG: AMDGPU/GlobalISel: Refine G_TRUNC legality rules
Scalarize most truncates. Avoid touching cases that could end up in
unresolvable infinite loops.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 381446ff95a6..63cc5246168c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -170,6 +170,16 @@ static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
};
}
+static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ if (!QueryTy.isVector())
+ return false;
+ const LLT EltTy = QueryTy.getElementType();
+ return EltTy == LLT::scalar(16) || EltTy.getSizeInBits() >= 32;
+ };
+}
+
static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
@@ -484,8 +494,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
FMad.scalarize(0)
.lower();
+ // TODO: Do we need to clamp maximum bitwidth?
getActionDefinitionsBuilder(G_TRUNC)
+ .legalIf(isScalar(0))
+ .legalFor({{V2S16, V2S32}})
.clampMaxNumElements(0, S16, 2)
+ // Avoid scalarizing in cases that should be truly illegal. In unresolvable
+ // situations (like an invalid implicit use), we don't want to infinite loop
+ // in the legalizer.
+ .fewerElementsIf(elementTypeIsLegal(0), LegalizeMutations::scalarize(0))
.alwaysLegal();
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
index 0b60a5efe620..a306ee696190 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
@@ -310,10 +310,15 @@ body: |
; CHECK-LABEL: name: test_unmerge_values_s32_of_trunc_concat_vectors_v2s64_v2s64
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>)
- ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s32>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s64>)
- ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](<4 x s32>)
- ; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32)
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+ ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32), [[TRUNC2]](s32), [[TRUNC3]](s32)
+ ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK: S_ENDPGM 0, implicit [[UV4]](s32), implicit [[UV5]](s32), implicit [[UV6]](s32), implicit [[UV7]](s32)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
index 62b3ac1d48e8..3c74a7f9c20b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
@@ -705,9 +705,10 @@ body: |
; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32)
; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32)
- ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64)
- ; SI: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>)
- ; SI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]]
+ ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+ ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+ ; SI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]]
; SI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
; SI: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>)
; VI-LABEL: name: test_copysign_v2s32_v2s64
@@ -723,9 +724,10 @@ body: |
; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32)
; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32)
- ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64)
- ; VI: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>)
- ; VI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]]
+ ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+ ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+ ; VI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]]
; VI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
; VI: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>)
; GFX9-LABEL: name: test_copysign_v2s32_v2s64
@@ -741,9 +743,10 @@ body: |
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32)
; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32)
- ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64)
- ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>)
- ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]]
+ ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+ ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+ ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]]
; GFX9: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
; GFX9: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
index 820adda529b5..640dc1e52dbe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
@@ -127,8 +127,17 @@ body: |
; CHECK-LABEL: name: test_trunc_v2s64_to_v2s16
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s64>)
- ; CHECK: $vgpr0 = COPY [[TRUNC]](<2 x s16>)
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s16>) = G_TRUNC %0
$vgpr0 = COPY %1
@@ -143,9 +152,25 @@ body: |
; CHECK-LABEL: name: test_trunc_v4s64_to_v4s16
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; CHECK: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
- ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s64>)
- ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s64>)
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
+ ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64)
+ ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]]
+ ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64)
+ ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]]
+ ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+ ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:_(<4 x s16>) = G_TRUNC %0
@@ -213,3 +238,166 @@ body: |
%1:_(<4 x s1>) = G_TRUNC %0
S_ENDPGM 0, implicit %1
...
+
+---
+name: test_trunc_v2s64_to_v2s32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+ ; CHECK-LABEL: name: test_trunc_v2s64_to_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x s32>) = G_TRUNC %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_trunc_v4s64_to_v4s32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+ ; CHECK-LABEL: name: test_trunc_v4s64_to_v4s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+ ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+ ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32), [[TRUNC2]](s32), [[TRUNC3]](s32)
+ ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+ %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:_(<4 x s32>) = G_TRUNC %0
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_trunc_v2s128_to_v2s64
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+ ; CHECK-LABEL: name: test_trunc_v2s128_to_v2s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[UV]](s128)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[TRUNC]](s64), [[TRUNC1]](s64)
+ ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:_(<2 x s64>) = G_TRUNC %0
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_trunc_v2s128_to_v2s32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+ ; CHECK-LABEL: name: test_trunc_v2s128_to_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s128)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s128)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:_(<2 x s32>) = G_TRUNC %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_trunc_v2s128_to_v2s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+ ; CHECK-LABEL: name: test_trunc_v2s128_to_v2s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s128)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s128)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:_(<2 x s16>) = G_TRUNC %0
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_trunc_v2s96_to_v2s32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1
+ %3:_(<2 x s32>) = G_TRUNC %2
+ $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: test_trunc_v2s96_to_v2s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1
+ %3:_(<2 x s16>) = G_TRUNC %2
+ $vgpr0 = COPY %3
+...
+
+---
+name: test_trunc_v2s96_to_v2s8
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s8
+ ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[COPY]](s96), [[COPY1]](s96)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s96>)
+ ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<2 x s8>)
+ %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1
+ %3:_(<2 x s8>) = G_TRUNC %2
+ S_ENDPGM 0, implicit %3
+...
More information about the llvm-commits
mailing list