[llvm] edd0dfc - AMDGPU/GlobalISel: Refine G_TRUNC legality rules

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 10 15:32:49 PDT 2020


Author: Matt Arsenault
Date: 2020-03-10T15:32:22-07:00
New Revision: edd0dfca0dbdcdaa7cdc3861ccecda6cc5bb9b3a

URL: https://github.com/llvm/llvm-project/commit/edd0dfca0dbdcdaa7cdc3861ccecda6cc5bb9b3a
DIFF: https://github.com/llvm/llvm-project/commit/edd0dfca0dbdcdaa7cdc3861ccecda6cc5bb9b3a.diff

LOG: AMDGPU/GlobalISel: Refine G_TRUNC legality rules

Scalarize most truncates. Avoid touching cases that could end up in
unresolvable infinite loops.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 381446ff95a6..63cc5246168c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -170,6 +170,16 @@ static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
   };
 }
 
+static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    const LLT QueryTy = Query.Types[TypeIdx];
+    if (!QueryTy.isVector())
+      return false;
+    const LLT EltTy = QueryTy.getElementType();
+    return EltTy == LLT::scalar(16) || EltTy.getSizeInBits() >= 32;
+  };
+}
+
 static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) {
   return [=](const LegalityQuery &Query) {
     const LLT Ty = Query.Types[TypeIdx];
@@ -484,8 +494,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
   FMad.scalarize(0)
       .lower();
 
+  // TODO: Do we need to clamp maximum bitwidth?
   getActionDefinitionsBuilder(G_TRUNC)
+    .legalIf(isScalar(0))
+    .legalFor({{V2S16, V2S32}})
     .clampMaxNumElements(0, S16, 2)
+    // Avoid scalarizing in cases that should be truly illegal. In unresolvable
+    // situations (like an invalid implicit use), we don't want to infinite loop
+    // in the legalizer.
+    .fewerElementsIf(elementTypeIsLegal(0), LegalizeMutations::scalarize(0))
     .alwaysLegal();
 
   getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
index 0b60a5efe620..a306ee696190 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
@@ -310,10 +310,15 @@ body:             |
     ; CHECK-LABEL: name: test_unmerge_values_s32_of_trunc_concat_vectors_v2s64_v2s64
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s32>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s64>)
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](<4 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32), implicit [[UV3]](s32)
+    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32), [[TRUNC2]](s32), [[TRUNC3]](s32)
+    ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
+    ; CHECK: S_ENDPGM 0, implicit [[UV4]](s32), implicit [[UV5]](s32), implicit [[UV6]](s32), implicit [[UV7]](s32)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
index 62b3ac1d48e8..3c74a7f9c20b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
@@ -705,9 +705,10 @@ body: |
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32)
     ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32)
-    ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>)
-    ; SI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+    ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]]
     ; SI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
     ; SI: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>)
     ; VI-LABEL: name: test_copysign_v2s32_v2s64
@@ -723,9 +724,10 @@ body: |
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32)
     ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>)
-    ; VI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]]
+    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]]
     ; VI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
     ; VI: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>)
     ; GFX9-LABEL: name: test_copysign_v2s32_v2s64
@@ -741,9 +743,10 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32)
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>)
-    ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]]
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+    ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]]
     ; GFX9: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
     ; GFX9: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
index 820adda529b5..640dc1e52dbe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
@@ -127,8 +127,17 @@ body: |
 
     ; CHECK-LABEL: name: test_trunc_v2s64_to_v2s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s64>)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](<2 x s16>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s16>) = G_TRUNC %0
     $vgpr0 = COPY %1
@@ -143,9 +152,25 @@ body: |
     ; CHECK-LABEL: name: test_trunc_v4s64_to_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; CHECK: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s64>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s64>)
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
+    ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
     ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     %1:_(<4 x s16>) = G_TRUNC %0
@@ -213,3 +238,166 @@ body: |
     %1:_(<4 x s1>) = G_TRUNC %0
     S_ENDPGM 0, implicit %1
 ...
+
+---
+name: test_trunc_v2s64_to_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: test_trunc_v2s64_to_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s32>) = G_TRUNC %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_trunc_v4s64_to_v4s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; CHECK-LABEL: name: test_trunc_v4s64_to_v4s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32), [[TRUNC2]](s32), [[TRUNC3]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<4 x s32>) = G_TRUNC %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_trunc_v2s128_to_v2s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; CHECK-LABEL: name: test_trunc_v2s128_to_v2s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[UV]](s128)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[TRUNC]](s64), [[TRUNC1]](s64)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<2 x s64>) = G_TRUNC %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_trunc_v2s128_to_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; CHECK-LABEL: name: test_trunc_v2s128_to_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s128)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s128)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<2 x s32>) = G_TRUNC %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_trunc_v2s128_to_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; CHECK-LABEL: name: test_trunc_v2s128_to_v2s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s128)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s128)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<2 x s16>) = G_TRUNC %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_trunc_v2s96_to_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1
+    %3:_(<2 x s32>) = G_TRUNC %2
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: test_trunc_v2s96_to_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]]
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1
+    %3:_(<2 x s16>) = G_TRUNC %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: test_trunc_v2s96_to_v2s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[COPY]](s96), [[COPY1]](s96)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s96>)
+    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<2 x s8>)
+    %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1
+    %3:_(<2 x s8>) = G_TRUNC %2
+    S_ENDPGM 0, implicit %3
+...


        


More information about the llvm-commits mailing list