[llvm] cbc378e - GlobalISel: Artifact combine merge-like and unmerges into merge-like

Petar Avramovic via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 24 04:33:53 PDT 2022


Author: Petar Avramovic
Date: 2022-10-24T13:33:06+02:00
New Revision: cbc378ecb87e3f31dd5aff91f2a621d500640412

URL: https://github.com/llvm/llvm-project/commit/cbc378ecb87e3f31dd5aff91f2a621d500640412
DIFF: https://github.com/llvm/llvm-project/commit/cbc378ecb87e3f31dd5aff91f2a621d500640412.diff

LOG: GlobalISel: Artifact combine merge-like and unmerges into merge-like

Recognize when sub-vectors have been split to elements which are used to
build large vector.
This happens when instructions have different vector sizes available.
For example a few arithmetic instruction are required to process all
elements of larger vector that can be stored using one instruction.

Differential Revision: https://reviews.llvm.org/D109242

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 111d4a461d30..3a53017a4e1c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -901,6 +901,39 @@ class LegalizationArtifactCombiner {
         return true;
       }
 
+      // Recognize when multiple unmerged sources with UnmergeSrcTy type
+      // can be merged into Dst with DstTy type directly.
+      // Types have to be either both vector or both non-vector types.
+
+      // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy)
+      // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy)
+      // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3
+      //
+      // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc
+
+      if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
+          getCoverTy(DstTy, UnmergeSrcTy) == DstTy) {
+        SmallVector<Register, 4> ConcatSources;
+        unsigned NumElts = Unmerge->getNumDefs();
+        for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) {
+          unsigned EltUnmergeIdx;
+          auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i),
+                                                     EltSize, EltUnmergeIdx);
+          // All unmerges have to be the same size.
+          if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) ||
+              (EltUnmergeIdx != 0))
+            return false;
+          if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize))
+            return false;
+          ConcatSources.push_back(UnmergeI->getSourceReg());
+        }
+
+        MIB.setInstrAndDebugLoc(MI);
+        MIB.buildMerge(Dst, ConcatSources);
+        DeadInsts.push_back(&MI);
+        return true;
+      }
+
       return false;
     }
   };

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir
index 43e355774047..87949ebef75a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir
@@ -177,10 +177,8 @@ body: |
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
-    ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
+    ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
@@ -199,9 +197,8 @@ body: |
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV]](s32), [[UV1]](s32)
-    ; GFX9-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY]](<2 x s32>)
+    ; GFX9-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[CONCAT_VECTORS]](<4 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
     %3:_(<4 x s32>) = G_BUILD_VECTOR %1, %2, %1, %2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
index 1e8fe3fbe4b0..1dcb2bf3e42a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -875,50 +875,7 @@ body: |
     ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
     ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[OR]](s32), [[LSHR]](s32)
     ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC]](<2 x s32>)
-    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C4]](s32)
-    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C5]](s32)
-    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C6]](s32)
-    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C4]](s32)
-    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C5]](s32)
-    ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C6]](s32)
-    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
-    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C8]](s16)
-    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[SHL3]]
-    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
-    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
-    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
-    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
-    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C8]](s16)
-    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND5]], [[SHL4]]
-    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
-    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
-    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
-    ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
-    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND8]], [[C8]](s16)
-    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND7]], [[SHL5]]
-    ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
-    ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
-    ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
-    ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
-    ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND10]], [[C8]](s16)
-    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND9]], [[SHL6]]
-    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
-    ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
-    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR5]](s32), [[OR6]](s32)
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32)
     ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 51232e2d26b4..ca121f6ed12c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -2753,10 +2753,8 @@ body: |
     ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>)
-    ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
-    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; VI-LABEL: name: test_load_flat_v32s8_align32
     ; VI: liveins: $vgpr0_vgpr1
     ; VI-NEXT: {{  $}}
@@ -2765,10 +2763,8 @@ body: |
     ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>)
-    ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
-    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v32s8_align32
     ; GFX9: liveins: $vgpr0_vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -2777,10 +2773,8 @@ body: |
     ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<4 x s32>) from unknown-address + 16)
-    ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-    ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>)
-    ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
-    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+    ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 0)
     %2:_(<8 x s32>) = G_BITCAST %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
index 67f0d3629cc5..98b210cbcdaa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
@@ -717,90 +717,66 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
     ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
     ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
-    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
     ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]]
     ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
     ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]]
-    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
-    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
-    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
-    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
-    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
     ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
+    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
+    ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+    ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL3]]
     ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
-    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
-    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
-    ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
-    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
-    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL5]]
-    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
     ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV1]](s64)
     ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
-    ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0
-    ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C]](s64)
+    ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV]](s64), 0
+    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]]
     ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48)
     ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48)
-    ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND9]](s64)
-    ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND10]](s64)
-    ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND6]](s64)
+    ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
     ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
-    ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
-    ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
-    ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
+    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
+    ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]]
+    ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
+    ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+    ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]]
+    ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL1]]
+    ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
+    ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
+    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]]
+    ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+    ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
     ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]]
-    ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]]
-    ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
-    ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]]
+    ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]]
     ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32)
-    ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C3]]
-    ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]]
-    ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL8]]
-    ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C3]]
-    ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL3]]
-    ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32)
-    ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C3]]
-    ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL9]]
-    ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]]
-    ; CHECK-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C3]]
-    ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32)
-    ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]]
-    ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL9]]
-    ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[OR12]](s32)
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR11]](s32)
-    ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[UV8]](s32)
-    ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL3]]
-    ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]]
-    ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR13]](s32), [[OR14]](s32)
-    ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384)
+    ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL5]]
+    ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[OR8]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR7]](s32)
+    ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[UV4]](s32)
+    ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL1]]
+    ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL1]]
+    ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR9]](s32), [[OR10]](s32)
+    ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND5]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV7]](s384)
     ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112)
     %0:_(s32) = COPY $vgpr0
     %1:_(s2) = G_TRUNC %0


        


More information about the llvm-commits mailing list